summaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/acct/.gitignore3
-rw-r--r--tools/testing/selftests/acct/Makefile5
-rw-r--r--tools/testing/selftests/acct/taskstats_fill_stats_tgid.c375
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h7
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c175
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h1
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c123
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jit_inline.c13
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c15
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c22
-rw-r--r--tools/testing/selftests/bpf/test_loader.c5
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c27
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h4
-rw-r--r--tools/testing/selftests/cgroup/test_core.c2
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c2
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh2
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c4
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c8
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c13
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c66
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c181
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py31
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py3
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py60
-rwxr-xr-xtools/testing/selftests/damon/sysfs.sh62
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_vlan_real_dev.sh180
-rw-r--r--tools/testing/selftests/drivers/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/filelock/.gitignore1
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c94
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc14
-rw-r--r--tools/testing/selftests/hid/Makefile7
-rw-r--r--tools/testing/selftests/iommu/iommufd.c51
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h17
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh4
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm4
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic.c1
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_v5.c10
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c26
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c24
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h6
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/s390/facility.h6
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h29
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h24
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c28
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c25
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c2
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c12
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c7
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c138
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c6
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c110
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86/debug_regs.c88
-rw-r--r--tools/testing/selftests/kvm/x86/hwcr_msr_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c21
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c15
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tdp_fault_test.c313
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/sev_dbg_test.c118
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c14
-rw-r--r--tools/testing/selftests/kvm/x86/sev_migrate_tests.c2
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_pat_test.c196
-rw-r--r--tools/testing/selftests/kvm/x86/sync_regs_test.c1
-rw-r--r--tools/testing/selftests/landlock/audit.h140
-rw-r--r--tools/testing/selftests/landlock/audit_test.c33
-rw-r--r--tools/testing/selftests/landlock/base_test.c122
-rw-r--r--tools/testing/selftests/landlock/common.h2
-rw-r--r--tools/testing/selftests/landlock/fs_test.c2445
-rw-r--r--tools/testing/selftests/landlock/net_test.c1358
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c1
-rw-r--r--tools/testing/selftests/landlock/scoped_abstract_unix_test.c78
-rw-r--r--tools/testing/selftests/landlock/scoped_signal_test.c182
-rw-r--r--tools/testing/selftests/livepatch/functions.sh12
-rwxr-xr-xtools/testing/selftests/livepatch/test-kprobe.sh8
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh219
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c33
-rw-r--r--tools/testing/selftests/liveupdate/Makefile2
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c188
-rw-r--r--tools/testing/selftests/liveupdate/luo_stress_files.c97
-rw-r--r--tools/testing/selftests/liveupdate/luo_stress_sessions.c102
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c24
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h2
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c2
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c4
-rw-r--r--tools/testing/selftests/mm/.gitignore4
-rw-r--r--tools/testing/selftests/mm/Makefile23
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh45
-rwxr-xr-xtools/testing/selftests/mm/check_config.sh2
-rw-r--r--tools/testing/selftests/mm/compaction_test.c118
-rw-r--r--tools/testing/selftests/mm/cow.c31
-rw-r--r--tools/testing/selftests/mm/droppable.c55
-rw-r--r--tools/testing/selftests/mm/folio_split_race_test.c25
-rw-r--r--tools/testing/selftests/mm/guard-regions.c20
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c5
-rw-r--r--tools/testing/selftests/mm/gup_test.c15
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c185
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c78
-rw-r--r--tools/testing/selftests/mm/hugepage_settings.c (renamed from tools/testing/selftests/mm/thp_settings.c)316
-rw-r--r--tools/testing/selftests/mm/hugepage_settings.h (renamed from tools/testing/selftests/mm/thp_settings.h)82
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c215
-rw-r--r--tools/testing/selftests/mm/hugetlb-mmap.c143
-rw-r--r--tools/testing/selftests/mm/hugetlb-mremap.c (renamed from tools/testing/selftests/mm/hugepage-mremap.c)47
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c121
-rw-r--r--tools/testing/selftests/mm/hugetlb-shm.c (renamed from tools/testing/selftests/mm/hugepage-shm.c)69
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c45
-rw-r--r--tools/testing/selftests/mm/hugetlb-vmemmap.c (renamed from tools/testing/selftests/mm/hugepage-vmemmap.c)46
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c15
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c9
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c22
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh60
-rw-r--r--tools/testing/selftests/mm/khugepaged.c575
-rwxr-xr-xtools/testing/selftests/mm/ksft_kmemleak_dedup.sh222
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c19
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c184
-rw-r--r--tools/testing/selftests/mm/madv_populate.c2
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c88
-rw-r--r--tools/testing/selftests/mm/migration.c147
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c84
-rw-r--r--tools/testing/selftests/mm/mremap_test.c109
-rw-r--r--tools/testing/selftests/mm/page_frag/page_frag_test.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c13
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h15
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c2
-rw-r--r--tools/testing/selftests/mm/process_madv.c28
-rw-r--r--tools/testing/selftests/mm/protection_keys.c130
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh235
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c6
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c41
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c96
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.h18
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c48
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c138
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c33
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c42
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh41
-rw-r--r--tools/testing/selftests/mm/vm_util.c136
-rw-r--r--tools/testing/selftests/mm/vm_util.h15
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh8
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh66
-rw-r--r--tools/testing/selftests/net/tls.c8
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c2
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c293
-rw-r--r--tools/testing/selftests/rdma/Makefile3
-rwxr-xr-xtools/testing/selftests/rdma/rxe_sent_rcvd_bytes.sh75
-rw-r--r--tools/testing/selftests/riscv/abi/Makefile2
-rw-r--r--tools/testing/selftests/riscv/cfi/Makefile2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile6
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/Makefile2
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile12
-rw-r--r--tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c7
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.c54
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json38
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json44
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_gso.py43
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c2
-rw-r--r--tools/testing/selftests/vfio/Makefile11
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h5
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/sysfs.h12
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h11
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk7
-rw-r--r--tools/testing/selftests/vfio/lib/sysfs.c150
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c157
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c6
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c21
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_sriov_uapi_test.c217
187 files changed, 10937 insertions, 2852 deletions
diff --git a/tools/testing/selftests/acct/.gitignore b/tools/testing/selftests/acct/.gitignore
index 7e78aac19038..9e9c61c5bfd6 100644
--- a/tools/testing/selftests/acct/.gitignore
+++ b/tools/testing/selftests/acct/.gitignore
@@ -1,3 +1,4 @@
acct_syscall
+taskstats_fill_stats_tgid
config
-process_log \ No newline at end of file
+process_log
diff --git a/tools/testing/selftests/acct/Makefile b/tools/testing/selftests/acct/Makefile
index 7e025099cf65..083cab5ddb72 100644
--- a/tools/testing/selftests/acct/Makefile
+++ b/tools/testing/selftests/acct/Makefile
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := acct_syscall
+TEST_GEN_PROGS += taskstats_fill_stats_tgid
+
CFLAGS += -Wall
+LDLIBS += -lpthread
-include ../lib.mk \ No newline at end of file
+include ../lib.mk
diff --git a/tools/testing/selftests/acct/taskstats_fill_stats_tgid.c b/tools/testing/selftests/acct/taskstats_fill_stats_tgid.c
new file mode 100644
index 000000000000..d6cab4ae26f2
--- /dev/null
+++ b/tools/testing/selftests/acct/taskstats_fill_stats_tgid.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <linux/genetlink.h>
+#include <linux/netlink.h>
+#include <linux/taskstats.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+#ifndef NLA_ALIGN
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int)NLA_ALIGN(sizeof(struct nlattr)))
+#endif
+
+#define BUSY_NS (200ULL * 1000 * 1000)
+
+struct worker_ctx {
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ bool ready;
+ bool release;
+};
+
+static unsigned long busy_sink;
+
+static void *taskstats_nla_data(const struct nlattr *na)
+{
+ return (void *)((char *)na + NLA_HDRLEN);
+}
+
+static bool taskstats_nla_ok(const struct nlattr *na, int remaining)
+{
+ return remaining >= (int)sizeof(*na) &&
+ na->nla_len >= sizeof(*na) &&
+ na->nla_len <= remaining;
+}
+
+static struct nlattr *taskstats_nla_next(const struct nlattr *na, int *remaining)
+{
+ int aligned_len = NLA_ALIGN(na->nla_len);
+
+ *remaining -= aligned_len;
+ return (struct nlattr *)((char *)na + aligned_len);
+}
+
+static uint64_t timespec_diff_ns(const struct timespec *start,
+ const struct timespec *end)
+{
+ return (uint64_t)(end->tv_sec - start->tv_sec) * 1000000000ULL +
+ (uint64_t)(end->tv_nsec - start->tv_nsec);
+}
+
+static void burn_cpu_for_ns(uint64_t runtime_ns)
+{
+ struct timespec start, now;
+ unsigned long acc = 0;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &start)) {
+ perror("clock_gettime");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ for (int i = 0; i < 100000; i++)
+ acc += i;
+ if (clock_gettime(CLOCK_MONOTONIC, &now)) {
+ perror("clock_gettime");
+ exit(EXIT_FAILURE);
+ }
+ } while (timespec_diff_ns(&start, &now) < runtime_ns);
+
+ busy_sink = acc;
+}
+
+static int netlink_open(void)
+{
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_pid = getpid(),
+ };
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (fd < 0)
+ return -errno;
+
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ int err = -errno;
+
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
+
+static int send_request(int fd, void *buf, size_t len)
+{
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+
+ if (sendto(fd, buf, len, 0, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int get_family_id(int fd, const char *name)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct genlmsghdr genl;
+ char buf[256];
+ } req = { 0 };
+ char resp[8192];
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *genl;
+ struct nlattr *na;
+ int len;
+ int rem;
+ int ret;
+
+ req.nlh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ req.nlh.nlmsg_type = GENL_ID_CTRL;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST;
+ req.nlh.nlmsg_seq = 1;
+ req.nlh.nlmsg_pid = getpid();
+
+ req.genl.cmd = CTRL_CMD_GETFAMILY;
+ req.genl.version = 1;
+
+ na = (struct nlattr *)((char *)&req + NLMSG_ALIGN(req.nlh.nlmsg_len));
+ na->nla_type = CTRL_ATTR_FAMILY_NAME;
+ na->nla_len = NLA_HDRLEN + strlen(name) + 1;
+ memcpy(taskstats_nla_data(na), name, strlen(name) + 1);
+ req.nlh.nlmsg_len = NLMSG_ALIGN(req.nlh.nlmsg_len) + NLA_ALIGN(na->nla_len);
+
+ ret = send_request(fd, &req, req.nlh.nlmsg_len);
+ if (ret)
+ return ret;
+
+ len = recv(fd, resp, sizeof(resp), 0);
+ if (len < 0)
+ return -errno;
+
+ for (nlh = (struct nlmsghdr *)resp; NLMSG_OK(nlh, len);
+ nlh = NLMSG_NEXT(nlh, len)) {
+ if (nlh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = NLMSG_DATA(nlh);
+
+ return err->error ? err->error : -ENOENT;
+ }
+
+ genl = (struct genlmsghdr *)NLMSG_DATA(nlh);
+ rem = nlh->nlmsg_len - NLMSG_HDRLEN - GENL_HDRLEN;
+ na = (struct nlattr *)((char *)genl + GENL_HDRLEN);
+ while (taskstats_nla_ok(na, rem)) {
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID)
+ return *(uint16_t *)taskstats_nla_data(na);
+ na = taskstats_nla_next(na, &rem);
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int get_taskstats(int fd, int family_id, uint16_t attr_type, uint32_t id,
+ struct taskstats *stats)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct genlmsghdr genl;
+ char buf[256];
+ } req = { 0 };
+ char resp[16384];
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *genl;
+ struct nlattr *na;
+ struct nlattr *nested;
+ int len;
+ int rem;
+ int nrem;
+ int ret;
+
+ memset(stats, 0, sizeof(*stats));
+
+ req.nlh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ req.nlh.nlmsg_type = family_id;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST;
+ req.nlh.nlmsg_seq = 2;
+ req.nlh.nlmsg_pid = getpid();
+
+ req.genl.cmd = TASKSTATS_CMD_GET;
+ req.genl.version = 1;
+
+ na = (struct nlattr *)((char *)&req + NLMSG_ALIGN(req.nlh.nlmsg_len));
+ na->nla_type = attr_type;
+ na->nla_len = NLA_HDRLEN + sizeof(id);
+ memcpy(taskstats_nla_data(na), &id, sizeof(id));
+ req.nlh.nlmsg_len = NLMSG_ALIGN(req.nlh.nlmsg_len) + NLA_ALIGN(na->nla_len);
+
+ ret = send_request(fd, &req, req.nlh.nlmsg_len);
+ if (ret)
+ return ret;
+
+ len = recv(fd, resp, sizeof(resp), 0);
+ if (len < 0)
+ return -errno;
+
+ for (nlh = (struct nlmsghdr *)resp; NLMSG_OK(nlh, len);
+ nlh = NLMSG_NEXT(nlh, len)) {
+ if (nlh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = NLMSG_DATA(nlh);
+
+ return err->error ? err->error : -ENOENT;
+ }
+
+ genl = (struct genlmsghdr *)NLMSG_DATA(nlh);
+ rem = nlh->nlmsg_len - NLMSG_HDRLEN - GENL_HDRLEN;
+ na = (struct nlattr *)((char *)genl + GENL_HDRLEN);
+ while (taskstats_nla_ok(na, rem)) {
+ if (na->nla_type == TASKSTATS_TYPE_AGGR_PID ||
+ na->nla_type == TASKSTATS_TYPE_AGGR_TGID) {
+ nested = (struct nlattr *)taskstats_nla_data(na);
+ nrem = na->nla_len - NLA_HDRLEN;
+ while (taskstats_nla_ok(nested, nrem)) {
+ if (nested->nla_type == TASKSTATS_TYPE_STATS) {
+ memcpy(stats, taskstats_nla_data(nested),
+ sizeof(*stats));
+ return 0;
+ }
+ nested = taskstats_nla_next(nested, &nrem);
+ }
+ }
+ na = taskstats_nla_next(na, &rem);
+ }
+ }
+
+ return -ENOENT;
+}
+
+static uint64_t cpu_total(const struct taskstats *stats)
+{
+ return (uint64_t)stats->ac_utime + (uint64_t)stats->ac_stime;
+}
+
+static void print_stats(const char *label, const struct taskstats *stats)
+{
+ ksft_print_msg("%s: cpu_total=%llu nvcsw=%llu nivcsw=%llu\n",
+ label, (unsigned long long)cpu_total(stats),
+ (unsigned long long)stats->nvcsw,
+ (unsigned long long)stats->nivcsw);
+}
+
+static void *worker_thread(void *arg)
+{
+ struct worker_ctx *ctx = arg;
+
+ burn_cpu_for_ns(BUSY_NS);
+
+ pthread_mutex_lock(&ctx->lock);
+ ctx->ready = true;
+ pthread_cond_broadcast(&ctx->cond);
+ while (!ctx->release)
+ pthread_cond_wait(&ctx->cond, &ctx->lock);
+ pthread_mutex_unlock(&ctx->lock);
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct worker_ctx ctx = {
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ .cond = PTHREAD_COND_INITIALIZER,
+ };
+ struct taskstats before, after;
+ pthread_t thread;
+ pid_t tgid = getpid();
+ int family_id;
+ int fd;
+ int ret;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (geteuid())
+ ksft_exit_skip("taskstats_fill_stats_tgid needs root\n");
+
+ fd = netlink_open();
+ if (fd < 0)
+ ksft_exit_skip("failed to open generic netlink socket: %s\n",
+ strerror(-fd));
+
+ family_id = get_family_id(fd, TASKSTATS_GENL_NAME);
+ if (family_id < 0)
+ ksft_exit_skip("taskstats generic netlink family unavailable: %s\n",
+ strerror(-family_id));
+
+ /* Create worker thread that burns 200ms of CPU */
+ if (pthread_create(&thread, NULL, worker_thread, &ctx) != 0)
+ ksft_exit_fail_msg("pthread_create failed: %s\n", strerror(errno));
+
+ /* Wait for worker to finish generating activity */
+ pthread_mutex_lock(&ctx.lock);
+ while (!ctx.ready)
+ pthread_cond_wait(&ctx.cond, &ctx.lock);
+ pthread_mutex_unlock(&ctx.lock);
+
+ /*
+ * Snapshot A: TGID stats while worker is alive and sleeping.
+ * Contains main thread + worker contributions.
+ */
+ ret = get_taskstats(fd, family_id, TASKSTATS_CMD_ATTR_TGID, tgid, &before);
+ if (ret)
+ ksft_exit_fail_msg("TGID query before exit failed: %s\n",
+ strerror(-ret));
+
+ /* Release worker so it can exit, then join (deterministic wait).
+ *
+ * Kernel exit path ordering guarantees:
+ * do_exit()
+ * taskstats_exit() -> fill_tgid_exit() (accumulates worker into signal->stats)
+ * exit_notify() (releases the thread)
+ * do_task_dead() -> __schedule() (wakes joiner)
+ *
+ * So pthread_join() returns only after fill_tgid_exit() has completed.
+ */
+ pthread_mutex_lock(&ctx.lock);
+ ctx.release = true;
+ pthread_cond_broadcast(&ctx.cond);
+ pthread_mutex_unlock(&ctx.lock);
+
+ pthread_join(thread, NULL);
+
+ /*
+ * Snapshot B: TGID stats after worker has exited.
+ * fill_stats_for_tgid() does:
+ * memcpy(signal->stats) <- includes fill_tgid_exit accumulation
+ * + scan live threads <- only main thread now
+ */
+ ret = get_taskstats(fd, family_id, TASKSTATS_CMD_ATTR_TGID, tgid, &after);
+ if (ret)
+ ksft_exit_fail_msg("TGID query after exit failed: %s\n",
+ strerror(-ret));
+
+ print_stats("TGID before worker exit", &before);
+ print_stats("TGID after worker exit", &after);
+
+ /*
+ * The worker burned 200ms of CPU before the first snapshot.
+ * If the kernel correctly retained its contribution via
+ * fill_tgid_exit(), then the TGID CPU total after exit must be at
+ * least as large as the TGID CPU total before exit.
+ */
+ ksft_test_result(cpu_total(&after) >= cpu_total(&before),
+ "TGID CPU stats should not regress after thread exit\n");
+
+ close(fd);
+ ksft_finished();
+ return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
+}
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index d1db355e872b..67ff7882299e 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -423,6 +423,8 @@ static inline int get_preempt_count(void)
return bpf_get_current_task_btf()->thread_info.preempt_count;
#elif defined(bpf_target_s390)
return bpf_get_lowcore()->preempt_count;
+#elif defined(bpf_target_loongarch)
+ return bpf_get_current_task_btf()->thread_info.preempt_count;
#endif
return 0;
}
@@ -433,6 +435,7 @@ static inline int get_preempt_count(void)
* * arm64
* * powerpc64
* * s390x
+ * * loongarch
*/
static inline int bpf_in_interrupt(void)
{
@@ -454,6 +457,7 @@ static inline int bpf_in_interrupt(void)
* * arm64
* * powerpc64
* * s390x
+ * * loongarch
*/
static inline int bpf_in_nmi(void)
{
@@ -466,6 +470,7 @@ static inline int bpf_in_nmi(void)
* * arm64
* * powerpc64
* * s390x
+ * * loongarch
*/
static inline int bpf_in_hardirq(void)
{
@@ -478,6 +483,7 @@ static inline int bpf_in_hardirq(void)
* * arm64
* * powerpc64
* * s390x
+ * * loongarch
*/
static inline int bpf_in_serving_softirq(void)
{
@@ -498,6 +504,7 @@ static inline int bpf_in_serving_softirq(void)
* * arm64
* * powerpc64
* * s390x
+ * * loongarch
*/
static inline int bpf_in_task(void)
{
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index bac60b444551..adb25146e88c 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -45,13 +45,16 @@ CONFIG_IPV6=y
CONFIG_IPV6_FOU=y
CONFIG_IPV6_FOU_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
CONFIG_IPV6_SEG6_BPF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPV6_SIT=y
CONFIG_IPV6_TUNNEL=y
CONFIG_KEYS=y
CONFIG_LIRC=y
CONFIG_LIVEPATCH=y
CONFIG_LWTUNNEL=y
+CONFIG_LWTUNNEL_BPF=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_UNLOAD=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b33dba4b126e..14d4c1793aed 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -5,6 +5,8 @@
#include "test_bpf_nf.skel.h"
#include "test_bpf_nf_fail.skel.h"
+#define CT_OPTS_ERROR_GUARD 0x12345678
+
static char log_buf[1024 * 1024];
struct {
@@ -119,6 +121,10 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+ ASSERT_EQ(skel->bss->test_einval_len_opts_small_lookup, CT_OPTS_ERROR_GUARD,
+ "Test no error write for lookup opts__sz before error field");
+ ASSERT_EQ(skel->bss->test_einval_len_opts_small_alloc, CT_OPTS_ERROR_GUARD,
+ "Test no error write for alloc opts__sz before error field");
ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 96f719a0cec9..66855cbd6b73 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4121,8 +4121,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Type tags don't precede modifiers",
},
{
.descr = "type_tag test #3, type tag order",
@@ -4141,8 +4139,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Type tags don't precede modifiers",
},
{
.descr = "type_tag test #4, type tag order",
@@ -4161,8 +4157,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Type tags don't precede modifiers",
},
{
.descr = "type_tag test #5, type tag order",
@@ -4198,11 +4192,9 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "tag_type_check_btf",
.key_size = sizeof(int),
.value_size = 4,
- .key_type_id = 1,
- .value_type_id = 1,
+ .key_type_id = 4,
+ .value_type_id = 4,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Type tags don't precede modifiers",
},
{
.descr = "type_tag test #7, tag with kflag",
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
index d4d583872fa2..d2ccf409dfe3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -102,6 +102,82 @@ close_skel:
return err;
}
+/*
+ * Replacing a link's program (bpf_link_update) must target the correct slot in
+ * the effective array even when a BPF_F_PREORDER program is attached to the
+ * same cgroup. All programs here are attached to a single cgroup; "parent" is
+ * reused only as a third distinct program.
+ *
+ * Attach child(1) normally and child_2(2) with BPF_F_PREORDER, so the effective
+ * order is [2, 1]. Then replace child(1)'s program with parent(3): only the
+ * non-preorder slot changes, giving [2, 3].
+ */
+static int run_link_replace_test(int cgroup_fd, int sock_fd)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ int err = 0, normal_link = -1, preorder_link = -1;
+ struct cgroup_preorder *skel = NULL;
+ enum bpf_attach_type atype;
+ __u8 *result, buf = 0x00;
+ socklen_t optlen = 1;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return -1;
+
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ atype = bpf_program__expected_attach_type(skel->progs.child);
+
+ create_opts.flags = 0;
+ normal_link = bpf_link_create(bpf_program__fd(skel->progs.child),
+ cgroup_fd, atype, &create_opts);
+ if (!ASSERT_GE(normal_link, 0, "create_normal_link")) {
+ err = normal_link;
+ goto close_skel;
+ }
+
+ create_opts.flags = BPF_F_PREORDER;
+ preorder_link = bpf_link_create(bpf_program__fd(skel->progs.child_2),
+ cgroup_fd, atype, &create_opts);
+ if (!ASSERT_GE(preorder_link, 0, "create_preorder_link")) {
+ err = preorder_link;
+ goto close_links;
+ }
+
+ result = skel->bss->result;
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt-before"))
+ goto close_links;
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "order before update");
+
+ /* Replace the normal link's program child(1) -> parent(3). */
+ err = bpf_link_update(normal_link, bpf_program__fd(skel->progs.parent), NULL);
+ if (!ASSERT_OK(err, "bpf_link_update"))
+ goto close_links;
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt-after"))
+ goto close_links;
+ ASSERT_TRUE(result[0] == 2 && result[1] == 3, "order after update");
+
+close_links:
+ if (preorder_link >= 0)
+ close(preorder_link);
+ close(normal_link);
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
void test_cgroup_preorder(void)
{
int cg_parent = -1, cg_child = -1, sock_fd = -1;
@@ -120,6 +196,7 @@ void test_cgroup_preorder(void)
ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+ ASSERT_OK(run_link_replace_test(cg_child, sock_fd), "link_replace_test");
out:
close(sock_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
index 72875071d4f1..6eb9096d084c 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -7,7 +7,6 @@
#include <linux/netdev.h>
#include <poll.h>
#include <pthread.h>
-#include <signal.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/socket.h>
@@ -65,11 +64,6 @@ static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
}
-static bool is_umem_valid(struct xsk_socket_info *xsk)
-{
- return !!xsk->umem->umem;
-}
-
static u32 mode_to_xdp_flags(enum test_mode mode)
{
return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
@@ -1010,7 +1004,7 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
return TEST_FAILURE;
if (!ret) {
- if (!is_umem_valid(test->ifobj_tx->xsk))
+ if (test->poll_tmout)
return TEST_PASS;
ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
@@ -1149,7 +1143,7 @@ static int receive_pkts(struct test_spec *test)
break;
res = __receive_pkts(test, xsk);
- if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ if (res != TEST_CONTINUE)
return res;
ret = gettimeofday(&tv_now, NULL);
@@ -1166,7 +1160,8 @@ static int receive_pkts(struct test_spec *test)
return TEST_PASS;
}
-static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk,
+ bool test_timeout)
{
u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
struct pkt_stream *pkt_stream = xsk->pkt_stream;
@@ -1178,7 +1173,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
/* pkts_in_flight might be negative if many invalid packets are sent */
if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
- buffer_len)) {
+ buffer_len) && !test_timeout) {
ret = kick_tx(xsk);
if (ret)
return TEST_FAILURE;
@@ -1191,7 +1186,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
if (use_poll) {
ret = poll(&fds, 1, POLL_TMOUT);
- if (timeout) {
+ if (test_timeout) {
if (ret < 0) {
ksft_print_msg("ERROR: [%s] Poll error %d\n",
__func__, errno);
@@ -1271,7 +1266,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
if (use_poll) {
ret = poll(&fds, 1, POLL_TMOUT);
if (ret <= 0) {
- if (ret == 0 && timeout)
+ if (ret == 0 && test_timeout)
return TEST_PASS;
ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
@@ -1279,14 +1274,14 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
}
}
- if (!timeout) {
+ if (!test_timeout) {
if (complete_pkts(xsk, i))
return TEST_FAILURE;
usleep(10);
- return TEST_PASS;
}
+ /* Loop completion is driven by send_pkts() stream progress checks. */
return TEST_CONTINUE;
}
@@ -1322,7 +1317,6 @@ bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
{
- bool timeout = !is_umem_valid(test->ifobj_rx->xsk);
DECLARE_BITMAP(bitmap, test->nb_sockets);
u32 i, ret;
@@ -1337,19 +1331,18 @@ static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
__set_bit(i, bitmap);
continue;
}
- ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
- if (ret == TEST_CONTINUE && !test->fail)
- continue;
-
- if ((ret || test->fail) && !timeout)
- return TEST_FAILURE;
-
- if (ret == TEST_PASS && timeout)
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], test->poll_tmout);
+ if (ret != TEST_CONTINUE)
return ret;
- ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
- if (ret)
+ if (test->fail)
return TEST_FAILURE;
+
+ if (!test->poll_tmout) {
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
}
}
@@ -1677,7 +1670,8 @@ void *worker_testapp_validate_rx(void *arg)
strerror(-err));
}
- pthread_barrier_wait(&barr);
+ if (test->use_barrier)
+ pthread_barrier_wait(&barr);
/* We leave only now in case of error to avoid getting stuck in the barrier */
if (err) {
@@ -1716,11 +1710,6 @@ static void testapp_clean_xsk_umem(struct ifobject *ifobj)
munmap(umem->buffer, umem->mmap_size);
}
-static void handler(int signum)
-{
- pthread_exit(NULL);
-}
-
static bool xdp_prog_changed_rx(struct test_spec *test)
{
struct ifobject *ifobj = test->ifobj_rx;
@@ -1825,9 +1814,18 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
return TEST_FAILURE;
}
- if (ifobj2) {
+ err = xsk_attach_xdp_progs(test, ifobj1, ifobj2);
+ if (err) {
+ ksft_print_msg("Error: failed to attach XDP programs: %d (%s)\n",
+ err, strerror(-err));
+ return TEST_FAILURE;
+ }
+ test->use_barrier = !!ifobj2;
+
+ if (test->use_barrier) {
if (pthread_barrier_init(&barr, NULL, 2))
return TEST_FAILURE;
+
pkt_stream_reset(ifobj2->xsk->pkt_stream);
}
@@ -1835,27 +1833,26 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
pkt_stream_reset(ifobj1->xsk->pkt_stream);
pkts_in_flight = 0;
- signal(SIGUSR1, handler);
/*Spawn RX thread */
pthread_create(&t0, NULL, ifobj1->func_ptr, test);
- if (ifobj2) {
+ if (test->use_barrier) {
pthread_barrier_wait(&barr);
if (pthread_barrier_destroy(&barr)) {
- pthread_kill(t0, SIGUSR1);
+ test->use_barrier = false;
+ pthread_join(t0, NULL);
clean_sockets(test, ifobj1);
clean_umem(test, ifobj1, NULL);
return TEST_FAILURE;
}
+ }
+ if (ifobj2) {
/*Spawn TX thread */
pthread_create(&t1, NULL, ifobj2->func_ptr, test);
-
pthread_join(t1, NULL);
}
- if (!ifobj2)
- pthread_kill(t0, SIGUSR1);
pthread_join(t0, NULL);
if (test->total_steps == test->current_step || test->fail) {
@@ -1893,8 +1890,6 @@ static int testapp_validate_traffic(struct test_spec *test)
}
}
- if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx))
- return TEST_FAILURE;
return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
}
@@ -2231,16 +2226,33 @@ int testapp_xdp_shared_umem(struct test_spec *test)
int testapp_poll_txq_tmout(struct test_spec *test)
{
+ bool shared_umem = test->ifobj_tx->shared_umem;
+ int ret;
+
+ test->poll_tmout = true;
+ /*
+ * POLL_TXQ_FULL exercises TX timeout setup in isolation.
+ * Keep TX out of shared-UMEM mode here so TX setup does not require
+ * RX UMEM to be initialized first.
+ */
+ test->ifobj_tx->shared_umem = false;
test->ifobj_tx->use_poll = true;
/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
test->ifobj_tx->xsk->umem->frame_size = 2048;
- if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048))
+ if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048)) {
+ test->ifobj_tx->shared_umem = shared_umem;
return TEST_FAILURE;
- return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+ }
+
+ ret = testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+ test->ifobj_tx->shared_umem = shared_umem;
+
+ return ret;
}
int testapp_poll_rxq_tmout(struct test_spec *test)
{
+ test->poll_tmout = true;
test->ifobj_rx->use_poll = true;
return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
index 4313d0d87235..03753ddc5dcd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xsk.h
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
@@ -207,6 +207,8 @@ struct test_spec {
bool set_ring;
bool adjust_tail;
bool adjust_tail_support;
+ bool poll_tmout;
+ bool use_barrier;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 26159e0499c7..448807676176 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include <linux/ipv6.h>
+#include <arpa/inet.h>
#include "test_xdp_context_test_run.skel.h"
#include "test_xdp_meta.skel.h"
@@ -8,9 +10,12 @@
#define TX_NAME "veth1"
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
+#define RX_MAC "02:00:00:00:00:01"
+#define TX_MAC "02:00:00:00:00:02"
#define TAP_NAME "tap0"
#define DUMMY_NAME "dum0"
#define TAP_NETNS "xdp_context_tuntap"
+#define LWT_NETNS "xdp_context_lwt"
#define TEST_PAYLOAD_LEN 32
static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
@@ -187,6 +192,42 @@ static int write_test_packet(int tap_fd)
return 0;
}
+/* Inject Ethernet+IPv6+UDP frame into TAP */
+static int write_test_packet_udp(int tap_fd)
+{
+ __u8 pkt[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
+ sizeof(struct udphdr) + TEST_PAYLOAD_LEN] = {};
+ struct ethhdr *eth = (void *)pkt;
+ struct ipv6hdr *ip6 = (void *)(eth + 1);
+ struct udphdr *udp = (void *)(ip6 + 1);
+ __u8 *payload = (void *)(udp + 1);
+ const __u8 tap_mac[ETH_ALEN] = { 0x02, 0, 0, 0, 0, 0x01 };
+ int n;
+
+ memcpy(eth->h_dest, tap_mac, ETH_ALEN);
+ eth->h_proto = htons(ETH_P_IPV6);
+
+ ip6->version = 6;
+ ip6->hop_limit = 64;
+ ip6->nexthdr = IPPROTO_UDP;
+ ip6->payload_len = htons(sizeof(*udp) + TEST_PAYLOAD_LEN);
+ inet_pton(AF_INET6, "fd00::2", &ip6->saddr);
+ inet_pton(AF_INET6, "fd00:1::1", &ip6->daddr);
+
+ udp->source = htons(42);
+ udp->dest = htons(42);
+ udp->len = htons(sizeof(*udp) + TEST_PAYLOAD_LEN);
+ /* UDP checksum is not validated on the forwarding path. */
+
+ memcpy(payload, test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, pkt, sizeof(pkt));
+ if (!ASSERT_EQ(n, sizeof(pkt), "write frame"))
+ return -1;
+
+ return 0;
+}
+
static void dump_err_stream(const struct bpf_program *prog)
{
char buf[512];
@@ -518,3 +559,137 @@ void test_xdp_context_tuntap(void)
test_xdp_meta__destroy(skel);
}
+
+/*
+ * Test topology:
+ *
+ * tap0 fd00::1
+ * RX: injected IPv6 UDP frame, XDP ingress sets metadata
+ * fwd: encap route prepends outer header(s)
+ * TX: TC egress validates metadata
+ *
+ * A routable IPv6 UDP frame is written into the tap fd, so it enters the RX
+ * path where XDP stores metadata. Routing then forwards it back out the same
+ * tap through an encapsulating route that prepends outer header(s). The TC
+ * egress program checks that the pushed header did not silently corrupt
+ * metadata.
+ */
+#define LWT_PIN_PATH "/sys/fs/bpf/xdp_context_lwt_xmit"
+
+enum lwt_encap_type {
+ LWT_ENCAP_BPF,
+ LWT_ENCAP_MPLS,
+ LWT_ENCAP_SEG6,
+ LWT_ENCAP_IOAM6,
+};
+
+static void test_lwt_encap(struct test_xdp_meta *skel,
+ enum lwt_encap_type type)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct bpf_program *lwt_prog = NULL;
+ struct netns_obj *ns = NULL;
+ const char *encap;
+ bool pinned = false;
+ int tap_ifindex;
+ int tap_fd = -1;
+ int ret;
+
+ skel->bss->test_pass = false;
+
+ switch (type) {
+ case LWT_ENCAP_BPF:
+ encap = "encap bpf xmit pinned " LWT_PIN_PATH " via fd00::2";
+ lwt_prog = skel->progs.dummy_lwt_xmit;
+ break;
+ case LWT_ENCAP_MPLS:
+ encap = "encap mpls 100 via inet6 fd00::2";
+ break;
+ case LWT_ENCAP_SEG6:
+ encap = "encap seg6 mode encap segs fd00::2";
+ break;
+ case LWT_ENCAP_IOAM6:
+ encap = "encap ioam6 mode encap tundst fd00::2 "
+ "trace prealloc type 0x800000 ns 0 size 4 via fd00::2";
+ break;
+ default:
+ return;
+ }
+
+ if (lwt_prog) {
+ unlink(LWT_PIN_PATH);
+ ret = bpf_program__pin(lwt_prog, LWT_PIN_PATH);
+ if (!ASSERT_OK(ret, "pin lwt prog"))
+ return;
+ pinned = true;
+ }
+
+ ns = netns_new(LWT_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ goto close;
+
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " address " RX_MAC);
+ SYS(close, "sysctl -wq net.ipv6.conf.all.forwarding=1");
+ SYS(close, "ip addr add fd00::1/64 dev " TAP_NAME " nodad");
+ SYS(close, "ip link set dev " TAP_NAME " up");
+ SYS(close, "ip neigh add fd00::2 lladdr " TX_MAC " nud permanent dev " TAP_NAME);
+ SYS(close, "ip -6 route add fd00:1::/64 %s dev %s", encap, TAP_NAME);
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ tc_hook.ifindex = tap_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc_is_meta_empty);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ ret = write_test_packet_udp(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet_udp"))
+ goto close;
+
+ if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+ dump_err_stream(skel->progs.tc_is_meta_empty);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+ if (pinned)
+ unlink(LWT_PIN_PATH);
+}
+
+void test_xdp_context_lwt_encap(void)
+{
+ struct test_xdp_meta *skel;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("bpf_encap"))
+ test_lwt_encap(skel, LWT_ENCAP_BPF);
+ if (test__start_subtest("mpls_encap"))
+ test_lwt_encap(skel, LWT_ENCAP_MPLS);
+ if (test__start_subtest("seg6_encap"))
+ test_lwt_encap(skel, LWT_ENCAP_SEG6);
+ if (test__start_subtest("ioam6_encap"))
+ test_lwt_encap(skel, LWT_ENCAP_IOAM6);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 9eeb5b0b63d6..b0c441384f20 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -158,6 +158,7 @@
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __arch_s390x __arch("s390x")
+#define __arch_loongarch __arch("LOONGARCH")
#define __caps_unpriv(caps) __test_tag("test_caps_unpriv=" EXPAND_QUOTE(caps))
#define __load_if_JITed() __test_tag("load_mode=jited")
#define __load_if_no_JITed() __test_tag("load_mode=no_jited")
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 076fbf03a126..df43649ecb78 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -10,6 +10,8 @@
#define EINVAL 22
#define ENOENT 2
+#define CT_OPTS_ERROR_GUARD 0x12345678
+
#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL)
#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY)
@@ -19,6 +21,8 @@ int test_einval_reserved = 0;
int test_einval_reserved_new = 0;
int test_einval_netns_id = 0;
int test_einval_len_opts = 0;
+int test_einval_len_opts_small_lookup = 0;
+int test_einval_len_opts_small_alloc = 0;
int test_eproto_l4proto = 0;
int test_enonet_netns_id = 0;
int test_enoent_lookup = 0;
@@ -124,6 +128,28 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
else
test_einval_len_opts = opts_def.error;
+ opts_def.error = CT_OPTS_ERROR_GUARD;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def.netns_id));
+ if (ct) {
+ bpf_ct_release(ct);
+ test_einval_len_opts_small_lookup = -EINVAL;
+ } else {
+ test_einval_len_opts_small_lookup = opts_def.error;
+ }
+
+ opts_def.error = CT_OPTS_ERROR_GUARD;
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def.netns_id));
+ if (ct) {
+ ct = bpf_ct_insert_entry(ct);
+ if (ct)
+ bpf_ct_release(ct);
+ test_einval_len_opts_small_alloc = -EINVAL;
+ } else {
+ test_einval_len_opts_small_alloc = opts_def.error;
+ }
+
opts_def.l4proto = IPPROTO_ICMP;
ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def));
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fa73b17cb999..08b03be0b891 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -21,10 +21,6 @@
bool test_pass;
-static const __u8 smac_want[ETH_ALEN] = {
- 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
-};
-
static const __u8 meta_want[META_SIZE] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
@@ -32,11 +28,6 @@ static const __u8 meta_want[META_SIZE] = {
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
};
-static bool check_smac(const struct ethhdr *eth)
-{
- return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
-}
-
static bool check_metadata(const char *file, int line, __u8 *meta_have)
{
if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
@@ -280,18 +271,47 @@ fail:
return TC_ACT_SHOT;
}
+/* Test packets carry test metadata pattern as payload. */
+static bool is_test_packet_xdp(struct xdp_md *ctx)
+{
+ __u8 meta_have[META_SIZE];
+ __u32 len;
+
+ len = bpf_xdp_get_buff_len(ctx);
+ if (len < META_SIZE)
+ return false;
+ if (bpf_xdp_load_bytes(ctx, len - META_SIZE, meta_have, META_SIZE))
+ return false;
+ if (__builtin_memcmp(meta_have, meta_want, META_SIZE))
+ return false;
+
+ return true;
+}
+
+/* Test packets carry test metadata pattern as payload. */
+static bool is_test_packet_tc(struct __sk_buff *ctx)
+{
+ __u8 meta_have[META_SIZE];
+
+ if (ctx->len < META_SIZE)
+ return false;
+ if (bpf_skb_load_bytes(ctx, ctx->len - META_SIZE, meta_have, META_SIZE))
+ return false;
+ if (__builtin_memcmp(meta_have, meta_want, META_SIZE))
+ return false;
+
+ return true;
+}
+
/* Reserve and clear space for metadata but don't populate it */
SEC("xdp")
int ing_xdp_zalloc_meta(struct xdp_md *ctx)
{
- struct ethhdr *eth = ctx_ptr(ctx, data);
__u8 *meta;
int ret;
/* Drop any non-test packets */
- if (eth + 1 > ctx_ptr(ctx, data_end))
- return XDP_DROP;
- if (!check_smac(eth))
+ if (!is_test_packet_xdp(ctx))
return XDP_DROP;
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -310,33 +330,24 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx)
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
- __u8 *data, *data_meta, *data_end, *payload;
- struct ethhdr *eth;
+ __u8 *data, *data_meta;
int ret;
+ /* Drop any non-test packets */
+ if (!is_test_packet_xdp(ctx))
+ return XDP_DROP;
+
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
if (ret < 0)
return XDP_DROP;
data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- eth = (struct ethhdr *)data;
- payload = data + sizeof(struct ethhdr);
-
- if (payload + META_SIZE > data_end ||
- data_meta + META_SIZE > data)
+ if (data_meta + META_SIZE > data)
return XDP_DROP;
- /* The Linux networking stack may send other packets on the test
- * interface that interfere with the test. Just drop them.
- * The test packets can be recognized by their source MAC address.
- */
- if (!check_smac(eth))
- return XDP_DROP;
-
- __builtin_memcpy(data_meta, payload, META_SIZE);
+ __builtin_memcpy(data_meta, meta_want, META_SIZE);
return XDP_PASS;
}
@@ -353,7 +364,7 @@ int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
if (eth + 1 > ctx_ptr(ctx, data_end))
goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
if (meta_have + META_SIZE > eth)
@@ -383,7 +394,7 @@ int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
if (eth + 1 > ctx_ptr(ctx, data_end))
goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
if (meta_have + META_SIZE > eth)
@@ -416,7 +427,7 @@ int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
if (!eth)
goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
@@ -436,16 +447,11 @@ out:
SEC("tc")
int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
{
- struct bpf_dynptr data, meta;
- const struct ethhdr *eth;
+ struct bpf_dynptr meta;
__u8 *meta_have;
- bpf_dynptr_from_skb(ctx, 0, &data);
- eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
- if (!eth)
- goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
@@ -471,15 +477,10 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
{
struct bpf_dynptr data, meta;
__u8 meta_have[META_SIZE];
- const struct ethhdr *eth;
int err;
- bpf_dynptr_from_skb(ctx, 0, &data);
- eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
- if (!eth)
- goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
/* Expect read-write metadata before unclone */
@@ -492,6 +493,7 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
goto out;
/* Helper write to payload will unclone the packet */
+ bpf_dynptr_from_skb(ctx, 0, &data);
bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
@@ -511,17 +513,12 @@ out:
SEC("tc")
int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
{
- struct bpf_dynptr data, meta;
+ struct bpf_dynptr meta;
__u8 meta_have[META_SIZE];
- const struct ethhdr *eth;
int err;
- bpf_dynptr_from_skb(ctx, 0, &data);
- eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
- if (!eth)
- goto out;
/* Ignore non-test packets */
- if (!check_smac(eth))
+ if (!is_test_packet_tc(ctx))
goto out;
/* Expect read-write metadata before unclone */
@@ -545,6 +542,28 @@ out:
return TC_ACT_SHOT;
}
+SEC("lwt_xmit")
+int dummy_lwt_xmit(struct __sk_buff *ctx)
+{
+ if (bpf_skb_change_head(ctx, sizeof(struct ipv6hdr), 0))
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+SEC("tc")
+int tc_is_meta_empty(struct __sk_buff *ctx)
+{
+ if (!is_test_packet_tc(ctx))
+ return TC_ACT_OK;
+
+ if (ctx->data_meta != ctx->data)
+ return TC_ACT_OK;
+
+ test_pass = true;
+ return TC_ACT_OK;
+}
+
SEC("tc")
int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
index 76d80605ec7f..02e562f56f9d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
+++ b/tools/testing/selftests/bpf/progs/verifier_jit_inline.c
@@ -12,6 +12,8 @@ __arch_arm64
__jited(" mrs x8, SP_EL0")
__arch_riscv64
__jited(" mv a5, tp")
+__arch_loongarch
+__jited(" move $a5, $tp")
int inline_bpf_get_current_task(void)
{
bpf_get_current_task();
@@ -19,4 +21,15 @@ int inline_bpf_get_current_task(void)
return 0;
}
+SEC("fentry/bpf_fentry_test2")
+__success __retval(0)
+__arch_loongarch
+__jited(" ld.wu $a5, $tp, 16")
+int inline_bpf_get_smp_processor_id(void)
+{
+ bpf_get_smp_processor_id();
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 2f8103bfa14e..c724bf389f5c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -197,4 +197,19 @@ int BPF_PROG(sleepable_lsm_cgroup)
return 0;
}
+SEC("lsm/file_mprotect")
+__description("lsm retval load must reset stale register bounds")
+__failure __msg("div by zero")
+__naked int retval_load_resets_bounds(void *ctx)
+{
+ asm volatile (
+ "r6 = 0;"
+ "r6 = *(u64 *)(r1 + 24);"
+ "if r6 == 0 goto +1;"
+ "r6 /= 0;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 6bc721accbae..0174887e28f5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -1359,4 +1359,22 @@ __naked void var_off_write_over_scalar_spill(void)
: __clobber_all);
}
+SEC("socket")
+__description("partial fill from cleaned pointer spill")
+__failure
+__log_level(2)
+__msg("1: (05) goto pc+0")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)")
+__msg("invalid size of register fill")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void partial_fill_from_cleaned_pointer_spill(void)
+{
+ /* Spill R1(ctx), then force a checkpoint and half-slot cleanup. */
+ asm volatile ("*(u64 *)(r10 - 8) = r1;"
+ "goto +0;"
+ "r0 = *(u32 *)(r10 - 4);"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index c16f8382cf17..49f7bd05edad 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -976,4 +976,26 @@ l0_%=: exit; \
: __clobber_all);
}
+SEC("socket")
+__description("unpriv: Spectre v4 stack write slot index")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V4
+__xlated_unpriv("r0 = 0")
+__xlated_unpriv("*(u32 *)(r10 -4) = r0")
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u32 *)(r10 -8) = r0")
+__xlated_unpriv("nospec")
+__xlated_unpriv("exit")
+#endif
+__naked void stack_write_nospec_slot_index(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u32 *)(r10 - 4) = r0; \
+ *(u32 *)(r10 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index abdb9e6e3713..3ce32d134e2c 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -377,6 +377,7 @@ enum arch {
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
ARCH_S390X = 0x10,
+ ARCH_LOONGARCH = 0x20,
};
static int get_current_arch(void)
@@ -389,6 +390,8 @@ static int get_current_arch(void)
return ARCH_RISCV64;
#elif defined(__s390x__)
return ARCH_S390X;
+#elif defined(__loongarch__)
+ return ARCH_LOONGARCH;
#endif
return ARCH_UNKNOWN;
}
@@ -580,6 +583,8 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_RISCV64;
} else if (strcmp(val, "s390x") == 0) {
arch = ARCH_S390X;
+ } else if (strcmp(val, "LOONGARCH") == 0) {
+ arch = ARCH_LOONGARCH;
} else {
PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 42f54936f4bb..2596c12cd864 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -59,7 +59,8 @@ char *cg_name(const char *root, const char *name)
size_t len = strlen(root) + strlen(name) + 2;
char *ret = malloc(len);
- snprintf(ret, len, "%s/%s", root, name);
+ if (ret)
+ snprintf(ret, len, "%s/%s", root, name);
return ret;
}
@@ -69,7 +70,8 @@ char *cg_name_indexed(const char *root, const char *name, int index)
size_t len = strlen(root) + strlen(name) + 10;
char *ret = malloc(len);
- snprintf(ret, len, "%s/%s_%d", root, name, index);
+ if (ret)
+ snprintf(ret, len, "%s/%s_%d", root, name, index);
return ret;
}
@@ -79,7 +81,8 @@ char *cg_control(const char *cgroup, const char *control)
size_t len = strlen(cgroup) + strlen(control) + 2;
char *ret = malloc(len);
- snprintf(ret, len, "%s/%s", cgroup, control);
+ if (ret)
+ snprintf(ret, len, "%s/%s", cgroup, control);
return ret;
}
@@ -141,7 +144,7 @@ int cg_read_strcmp_wait(const char *cgroup, const char *control,
int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
if (cg_read(cgroup, control, buf, sizeof(buf)))
return -1;
@@ -171,7 +174,7 @@ long cg_read_long_fd(int fd)
long cg_read_key_long(const char *cgroup, const char *control, const char *key)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
char *ptr;
if (cg_read(cgroup, control, buf, sizeof(buf)))
@@ -207,7 +210,7 @@ long cg_read_key_long_poll(const char *cgroup, const char *control,
long cg_read_lc(const char *cgroup, const char *control)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
const char delim[] = "\n";
char *line;
long cnt = 0;
@@ -259,7 +262,7 @@ int cg_write_numeric(const char *cgroup, const char *control, long value)
static int cg_find_root(char *root, size_t len, const char *controller,
bool *nsdelegate)
{
- char buf[10 * PAGE_SIZE];
+ char buf[10 * BUF_SIZE];
char *fs, *mount, *type, *options;
const char delim[] = "\n\t ";
@@ -314,7 +317,7 @@ int cg_create(const char *cgroup)
int cg_wait_for_proc_count(const char *cgroup, int count)
{
- char buf[10 * PAGE_SIZE] = {0};
+ char buf[10 * BUF_SIZE] = {0};
int attempts;
char *ptr;
@@ -339,7 +342,7 @@ int cg_wait_for_proc_count(const char *cgroup, int count)
int cg_killall(const char *cgroup)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
char *ptr = buf;
/* If cgroup.kill exists use it. */
@@ -549,7 +552,7 @@ int cg_run_nowait(const char *cgroup,
int proc_mount_contains(const char *option)
{
- char buf[4 * PAGE_SIZE];
+ char buf[4 * BUF_SIZE];
ssize_t read;
read = read_text("/proc/mounts", buf, sizeof(buf));
@@ -561,7 +564,7 @@ int proc_mount_contains(const char *option)
int cgroup_feature(const char *feature)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
ssize_t read;
read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
@@ -588,7 +591,7 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
return -1;
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 567b1082974c..febc1723d090 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -2,8 +2,8 @@
#include <stdbool.h>
#include <stdlib.h>
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
+#ifndef BUF_SIZE
+#define BUF_SIZE 4096
#endif
#define MB(x) (x << 20)
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 7b83c7e7c9d4..88ca832d4fc1 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -87,7 +87,7 @@ static int test_cgcore_destroy(const char *root)
int ret = KSFT_FAIL;
char *cg_test = NULL;
int child_pid;
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
cg_test = cg_name(root, "cg_test");
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index c83f05438d7c..7a40d76b9548 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -278,7 +278,7 @@ static int test_cpucg_nice(const char *root)
char buf[64];
snprintf(buf, sizeof(buf), "%d", getpid());
if (cg_write(cpucg, "cgroup.procs", buf))
- goto cleanup;
+ exit(EXIT_FAILURE);
/* Try to keep niced CPU usage as constrained to hog_cpu as possible */
nice(1);
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 683b05062810..0d41aa0d343d 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -627,7 +627,7 @@ set_ctrl_state_noerr()
online_cpus()
{
- [[ -n "OFFLINE_CPUS" ]] && {
+ [[ -n "$OFFLINE_CPUS" ]] && {
for C in $OFFLINE_CPUS
do
write_cpu_online ${C}=1
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 97fae92c8387..0569e93fa6b0 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -642,7 +642,7 @@ cleanup:
*/
static int proc_check_stopped(int pid)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
int len;
len = proc_read_text(pid, 0, "stat", buf, sizeof(buf));
@@ -1353,7 +1353,7 @@ static int test_cgfreezer_time_child(const char *root)
}
if (ctime <= ptime) {
- debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime);
+ debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime);
goto cleanup;
}
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
index f451aa449be6..b627d84358b1 100644
--- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -217,6 +217,14 @@ int main(int argc, char **argv)
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) {
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+ }
+
switch (test_hugetlb_memcg(root)) {
case KSFT_PASS:
ksft_test_result_pass("test_hugetlb_memcg\n");
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 12f59925500b..1db0ba1226b9 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -24,7 +24,7 @@
* the maximum discrepancy between charge and vmstat entries is number
* of cpus multiplied by 64 pages.
*/
-#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
+#define MAX_VMSTAT_ERROR (sysconf(_SC_PAGESIZE) * 64 * get_nprocs())
#define KMEM_DEAD_WAIT_RETRIES 80
@@ -353,7 +353,7 @@ static int test_percpu_basic(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
- long current, percpu;
+ long current, percpu, slab;
int i;
parent = cg_name(root, "percpu_basic_test");
@@ -383,13 +383,14 @@ static int test_percpu_basic(const char *root)
current = cg_read_long(parent, "memory.current");
percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+ slab = cg_read_key_long(parent, "memory.stat", "slab ");
- if (current > 0 && percpu > 0 && labs(current - percpu) <
- MAX_VMSTAT_ERROR)
+ if (current > 0 && percpu > 0 && slab >= 0 &&
+ labs(current - (percpu + slab)) < MAX_VMSTAT_ERROR)
ret = KSFT_PASS;
else
- printf("memory.current %ld\npercpu %ld\n",
- current, percpu);
+ printf("memory.current %ld\npercpu %ld\nslab %ld\ndelta %ld\n",
+ current, percpu, slab, current - (percpu + slab));
cleanup_children:
for (i = 0; i < 1000; i++) {
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index b43da9bc20c4..0ebf796f3cff 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -26,6 +26,7 @@
static bool has_localevents;
static bool has_recursiveprot;
+static int page_size;
int get_temp_fd(void)
{
@@ -34,7 +35,7 @@ int get_temp_fd(void)
int alloc_pagecache(int fd, size_t size)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
struct stat st;
int i;
@@ -55,22 +56,38 @@ cleanup:
return -1;
}
-int alloc_anon(const char *cgroup, void *arg)
+static char *alloc_and_populate_anon(size_t size)
{
- size_t size = (unsigned long)arg;
char *buf, *ptr;
buf = malloc(size);
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
+ return NULL;
+ }
+
+ for (ptr = buf; ptr < buf + size; ptr += page_size)
*ptr = 0;
+ return buf;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf;
+
+ buf = alloc_and_populate_anon(size);
+ if (!buf)
+ return -1;
+
free(buf);
return 0;
}
int is_swap_enabled(void)
{
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
const char delim[] = "\n";
int cnt = 0;
char *line;
@@ -113,7 +130,7 @@ static int test_memcg_subtree_control(const char *root)
{
char *parent, *child, *parent2 = NULL, *child2 = NULL;
int ret = KSFT_FAIL;
- char buf[PAGE_SIZE];
+ char buf[BUF_SIZE];
/* Create two nested cgroups with the memory controller enabled */
parent = cg_name(root, "memcg_test_0");
@@ -174,18 +191,13 @@ cleanup_free:
static int alloc_anon_50M_check(const char *cgroup, void *arg)
{
size_t size = MB(50);
- char *buf, *ptr;
+ char *buf;
long anon, current;
int ret = -1;
- buf = malloc(size);
- if (buf == NULL) {
- fprintf(stderr, "malloc() failed\n");
+ buf = alloc_and_populate_anon(size);
+ if (!buf)
return -1;
- }
-
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
current = cg_read_long(cgroup, "memory.current");
if (current < size)
@@ -406,16 +418,11 @@ static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
size_t size = (unsigned long)arg;
- char *buf, *ptr;
+ char *buf;
- buf = malloc(size);
- if (buf == NULL) {
- fprintf(stderr, "malloc() failed\n");
+ buf = alloc_and_populate_anon(size);
+ if (!buf)
return -1;
- }
-
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
while (getppid() == ppid)
sleep(1);
@@ -990,18 +997,13 @@ static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
size_t size = MB(50);
- char *buf, *ptr;
+ char *buf;
long mem_current, swap_current;
int ret = -1;
- buf = malloc(size);
- if (buf == NULL) {
- fprintf(stderr, "malloc() failed\n");
+ buf = alloc_and_populate_anon(size);
+ if (!buf)
return -1;
- }
-
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
mem_current = cg_read_long(cgroup, "memory.current");
if (!mem_current || !values_close(mem_current, mem_max, 3))
@@ -1791,6 +1793,10 @@ int main(int argc, char **argv)
char root[PATH_MAX];
int i, proc_status;
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (page_size <= 0)
+ page_size = BUF_SIZE;
+
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index a7bdcdd09d62..49b36ee79160 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -11,10 +11,16 @@
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
+#include <sys/random.h>
#include "kselftest.h"
#include "cgroup_util.h"
+static int page_size;
+
+#define PATH_ZSWAP "/sys/module/zswap"
+#define PATH_ZSWAP_ENABLED "/sys/module/zswap/parameters/enabled"
+
static int read_int(const char *path, size_t *value)
{
FILE *file;
@@ -70,11 +76,11 @@ static int allocate_and_read_bytes(const char *cgroup, void *arg)
if (!mem)
return -1;
- for (int i = 0; i < size; i += 4095)
+ for (int i = 0; i < size; i += page_size)
mem[i] = 'a';
/* Go through the allocated memory to (z)swap in and out pages */
- for (int i = 0; i < size; i += 4095) {
+ for (int i = 0; i < size; i += page_size) {
if (mem[i] != 'a')
ret = -1;
}
@@ -90,7 +96,7 @@ static int allocate_bytes(const char *cgroup, void *arg)
if (!mem)
return -1;
- for (int i = 0; i < size; i += 4095)
+ for (int i = 0; i < size; i += page_size)
mem[i] = 'a';
free(mem);
return 0;
@@ -115,6 +121,27 @@ fail:
}
/*
+ * Writeback is asynchronous; poll until at least one writeback has
+ * been recorded for @cg, or until @timeout_ms has elapsed.
+ */
+static long wait_for_writeback(const char *cg, int timeout_ms)
+{
+ long elapsed, count;
+ for (elapsed = 0; elapsed < timeout_ms; elapsed += 100) {
+ count = get_cg_wb_count(cg);
+
+ if (count < 0)
+ return -1;
+ if (count > 0)
+ return count;
+
+ usleep(100000);
+ }
+
+ return 0;
+}
+
+/*
* Sanity test to check that pages are written into zswap.
*/
static int test_zswap_usage(const char *root)
@@ -162,21 +189,25 @@ out:
static int test_swapin_nozswap(const char *root)
{
int ret = KSFT_FAIL;
- char *test_group;
- long swap_peak, zswpout;
+ char *test_group, mem_max_buf[32];
+ long swap_peak, zswpout, min_swap;
+ size_t allocation_size = page_size * 512;
+
+ min_swap = allocation_size / 4;
+ snprintf(mem_max_buf, sizeof(mem_max_buf), "%zu", allocation_size * 3/4);
test_group = cg_name(root, "no_zswap_test");
if (!test_group)
goto out;
if (cg_create(test_group))
goto out;
- if (cg_write(test_group, "memory.max", "8M"))
+ if (cg_write(test_group, "memory.max", mem_max_buf))
goto out;
if (cg_write(test_group, "memory.zswap.max", "0"))
goto out;
/* Allocate and read more than memory.max to trigger swapin */
- if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+ if (cg_run(test_group, allocate_and_read_bytes, (void *)allocation_size))
goto out;
/* Verify that pages are swapped out, but no zswap happened */
@@ -186,8 +217,9 @@ static int test_swapin_nozswap(const char *root)
goto out;
}
- if (swap_peak < MB(24)) {
- ksft_print_msg("at least 24MB of memory should be swapped out\n");
+ if (swap_peak < min_swap) {
+ ksft_print_msg("at least %ldKB of memory should be swapped out\n",
+ min_swap / 1024);
goto out;
}
@@ -237,7 +269,7 @@ static int test_zswapin(const char *root)
goto out;
}
- if (zswpin < MB(24) / PAGE_SIZE) {
+ if (zswpin < MB(24) / page_size) {
ksft_print_msg("at least 24MB should be brought back from zswap\n");
goto out;
}
@@ -257,16 +289,15 @@ out:
This will move it into zswap.
* 3. Save current zswap usage.
* 4. Move the memory allocated in step 1 back in from zswap.
- * 5. Set zswap.max to half the amount that was recorded in step 3.
+ * 5. Set zswap.max to 1/4 of the amount that was recorded in step 3.
* 6. Attempt to reclaim memory equal to the amount that was allocated,
this will either trigger writeback if it's enabled, or reclamation
will fail if writeback is disabled as there isn't enough zswap space.
*/
static int attempt_writeback(const char *cgroup, void *arg)
{
- long pagesize = sysconf(_SC_PAGESIZE);
- size_t memsize = MB(4);
- char buf[pagesize];
+ size_t memsize = page_size * 1024;
+ char buf[page_size];
long zswap_usage;
bool wb_enabled = *(bool *) arg;
int ret = -1;
@@ -281,11 +312,11 @@ static int attempt_writeback(const char *cgroup, void *arg)
* half empty, this will result in data that is still compressible
* and ends up in zswap, with material zswap usage.
*/
- for (int i = 0; i < pagesize; i++)
- buf[i] = i < pagesize/2 ? (char) i : 0;
+ for (int i = 0; i < page_size; i++)
+ buf[i] = i < page_size/2 ? (char) i : 0;
- for (int i = 0; i < memsize; i += pagesize)
- memcpy(&mem[i], buf, pagesize);
+ for (int i = 0; i < memsize; i += page_size)
+ memcpy(&mem[i], buf, page_size);
/* Try and reclaim allocated memory */
if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
@@ -296,19 +327,19 @@ static int attempt_writeback(const char *cgroup, void *arg)
zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
/* zswpin */
- for (int i = 0; i < memsize; i += pagesize) {
- if (memcmp(&mem[i], buf, pagesize)) {
+ for (int i = 0; i < memsize; i += page_size) {
+ if (memcmp(&mem[i], buf, page_size)) {
ksft_print_msg("invalid memory\n");
goto out;
}
}
- if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
+ if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/4))
goto out;
/*
* If writeback is enabled, trying to reclaim memory now will trigger a
- * writeback as zswap.max is half of what was needed when reclaim ran the first time.
+ * writeback as zswap.max is 1/4 of what was needed when reclaim ran the first time.
* If writeback is disabled, memory reclaim will fail as zswap is limited and
* it can't writeback to swap.
*/
@@ -335,7 +366,10 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb)
return -1;
/* Verify that zswap writeback occurred only if writeback was enabled */
- zswpwb_after = get_cg_wb_count(cgroup);
+ if (wb)
+ zswpwb_after = wait_for_writeback(cgroup, 5000);
+ else
+ zswpwb_after = get_cg_wb_count(cgroup);
if (zswpwb_after < 0)
return -1;
@@ -417,44 +451,71 @@ static int test_zswap_writeback_disabled(const char *root)
static int test_no_invasive_cgroup_shrink(const char *root)
{
int ret = KSFT_FAIL;
- size_t control_allocation_size = MB(10);
- char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
+ unsigned int off;
+ size_t allocation_size = page_size * 1024;
+ unsigned int nr_pages = allocation_size / page_size;
+ char zswap_max_buf[32], mem_max_buf[32];
+ char *zw_allocation = NULL, *wb_allocation = NULL;
+ char *zw_group = NULL, *wb_group = NULL;
+
+ snprintf(zswap_max_buf, sizeof(zswap_max_buf), "%d", page_size);
+ snprintf(mem_max_buf, sizeof(mem_max_buf), "%zu", allocation_size / 2);
wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
if (!wb_group)
return KSFT_FAIL;
- if (cg_write(wb_group, "memory.zswap.max", "10K"))
+ if (cg_write(wb_group, "memory.zswap.max", zswap_max_buf))
+ goto out;
+ if (cg_write(wb_group, "memory.max", mem_max_buf))
+ goto out;
+
+ zw_group = setup_test_group_1M(root, "per_memcg_wb_test2");
+ if (!zw_group)
goto out;
- control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
- if (!control_group)
+ if (cg_write(zw_group, "memory.max", mem_max_buf))
goto out;
- /* Push some test_group2 memory into zswap */
- if (cg_enter_current(control_group))
+ /* Push some zw_group memory into zswap (simple data, easy to compress) */
+ if (cg_enter_current(zw_group))
goto out;
- control_allocation = malloc(control_allocation_size);
- for (int i = 0; i < control_allocation_size; i += 4095)
- control_allocation[i] = 'a';
- if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
+ zw_allocation = malloc(allocation_size);
+ for (int i = 0; i < nr_pages; i++) {
+ off = (unsigned long)i * page_size;
+ memset(&zw_allocation[off], 0, page_size);
+ memset(&zw_allocation[off], 'a', page_size/4);
+ }
+ if (cg_read_key_long(zw_group, "memory.stat", "zswapped") < 1)
goto out;
- /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
- if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
+ /* Push wb_group memory into zswap with hard-to-compress data to trigger wb */
+ if (cg_enter_current(wb_group))
goto out;
+ wb_allocation = malloc(allocation_size);
+ if (!wb_allocation)
+ goto out;
+ for (int i = 0; i < nr_pages; i++) {
+ off = (unsigned long)i * page_size;
+ memset(&wb_allocation[off], 0, page_size);
+ getrandom(&wb_allocation[off], page_size/4, 0);
+ }
/* Verify that only zswapped memory from gwb_group has been written back */
- if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
+ if (wait_for_writeback(wb_group, 5000) > 0 && get_cg_wb_count(zw_group) == 0)
ret = KSFT_PASS;
out:
cg_enter_current(root);
- if (control_group) {
- cg_destroy(control_group);
- free(control_group);
+ if (zw_group) {
+ cg_destroy(zw_group);
+ free(zw_group);
}
- cg_destroy(wb_group);
- free(wb_group);
- if (control_allocation)
- free(control_allocation);
+ if (wb_group) {
+ cg_destroy(wb_group);
+ free(wb_group);
+ }
+ if (zw_allocation)
+ free(zw_allocation);
+ if (wb_allocation)
+ free(wb_allocation);
return ret;
}
@@ -473,7 +534,7 @@ static int no_kmem_bypass_child(const char *cgroup, void *arg)
values->child_allocated = true;
return -1;
}
- for (long i = 0; i < values->target_alloc_bytes; i += 4095)
+ for (long i = 0; i < values->target_alloc_bytes; i += page_size)
((char *)allocation)[i] = 'a';
values->child_allocated = true;
pause();
@@ -521,7 +582,7 @@ static int test_no_kmem_bypass(const char *root)
min_free_kb_low = sys_info.totalram / 500000;
values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
sys_info.totalram * 5 / 100;
- stored_pages_threshold = sys_info.totalram / 5 / 4096;
+ stored_pages_threshold = sys_info.totalram / 5 / page_size;
trigger_allocation_size = sys_info.totalram / 20;
/* Set up test memcg */
@@ -548,7 +609,7 @@ static int test_no_kmem_bypass(const char *root)
if (!trigger_allocation)
break;
- for (int i = 0; i < trigger_allocation_size; i += 4095)
+ for (int i = 0; i < trigger_allocation_size; i += page_size)
trigger_allocation[i] = 'b';
usleep(100000);
free(trigger_allocation);
@@ -559,8 +620,8 @@ static int test_no_kmem_bypass(const char *root)
/* If memory was pushed to zswap, verify it belongs to memcg */
if (stored_pages > stored_pages_threshold) {
int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
- int delta = stored_pages * 4096 - zswapped;
- int result_ok = delta < stored_pages * 4096 / 4;
+ int delta = stored_pages * page_size - zswapped;
+ int result_ok = delta < stored_pages * page_size / 4;
ret = result_ok ? KSFT_PASS : KSFT_FAIL;
break;
@@ -614,7 +675,7 @@ static int allocate_random_and_wait(const char *cgroup, void *arg)
close(fd);
/* Touch all pages to ensure they're faulted in */
- for (size_t i = 0; i < size; i += PAGE_SIZE)
+ for (size_t i = 0; i < size; i += page_size)
mem[i] = mem[i];
/* Use MADV_PAGEOUT to push pages into zswap */
@@ -725,9 +786,18 @@ struct zswap_test {
};
#undef T
-static bool zswap_configured(void)
+static void check_zswap_enabled(void)
{
- return access("/sys/module/zswap", F_OK) == 0;
+ char value[2];
+
+ if (access(PATH_ZSWAP, F_OK))
+ ksft_exit_skip("zswap isn't configured\n");
+
+ if (read_text(PATH_ZSWAP_ENABLED, value, sizeof(value)) <= 0)
+ ksft_exit_fail_msg("Failed to read " PATH_ZSWAP_ENABLED "\n");
+
+ if (value[0] == 'N')
+ ksft_exit_skip("zswap is disabled (hint: echo 1 > " PATH_ZSWAP_ENABLED ")\n");
}
int main(int argc, char **argv)
@@ -735,13 +805,16 @@ int main(int argc, char **argv)
char root[PATH_MAX];
int i;
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (page_size <= 0)
+ page_size = BUF_SIZE;
+
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
- if (!zswap_configured())
- ksft_exit_skip("zswap isn't configured\n");
+ check_zswap_enabled();
/*
* Check that memory controller is available:
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 2b4df655d9fd..8b12cc048440 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -132,14 +132,17 @@ class DamosQuota:
goals = None # quota goals
goal_tuner = None # quota goal tuner
reset_interval_ms = None # quota reset interval
+ fail_charge_num = None
+ fail_charge_denom = None
weight_sz_permil = None
weight_nr_accesses_permil = None
weight_age_permil = None
scheme = None # owner scheme
def __init__(self, sz=0, ms=0, goals=None, goal_tuner='consist',
- reset_interval_ms=0, weight_sz_permil=0,
- weight_nr_accesses_permil=0, weight_age_permil=0):
+ reset_interval_ms=0, fail_charge_num=0, fail_charge_denom=0,
+ weight_sz_permil=0, weight_nr_accesses_permil=0,
+ weight_age_permil=0):
self.sz = sz
self.ms = ms
self.reset_interval_ms = reset_interval_ms
@@ -151,6 +154,8 @@ class DamosQuota:
for idx, goal in enumerate(self.goals):
goal.idx = idx
goal.quota = self
+ self.fail_charge_num = fail_charge_num
+ self.fail_charge_denom = fail_charge_denom
def sysfs_dir(self):
return os.path.join(self.scheme.sysfs_dir(), 'quotas')
@@ -197,6 +202,18 @@ class DamosQuota:
os.path.join(self.sysfs_dir(), 'goal_tuner'), self.goal_tuner)
if err is not None:
return err
+
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'fail_charge_num'),
+ self.fail_charge_num)
+ if err is not None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'fail_charge_denom'),
+ self.fail_charge_denom)
+ if err is not None:
+ return err
+
return None
class DamosWatermarks:
@@ -604,10 +621,11 @@ class DamonCtx:
targets = None
schemes = None
kdamond = None
+ pause = None
idx = None
def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[],
- schemes=[]):
+ schemes=[], pause=False):
self.ops = ops
self.monitoring_attrs = monitoring_attrs
self.monitoring_attrs.context = self
@@ -622,6 +640,8 @@ class DamonCtx:
scheme.idx = idx
scheme.context = self
+ self.pause=pause
+
def sysfs_dir(self):
return os.path.join(self.kdamond.sysfs_dir(), 'contexts',
'%d' % self.idx)
@@ -662,6 +682,11 @@ class DamonCtx:
err = scheme.stage()
if err is not None:
return err
+
+ err = write_file(os.path.join(self.sysfs_dir(), 'pause'), self.pause)
+ if err is not None:
+ return err
+
return None
class Kdamond:
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
index af99b07a4f56..972948e6215f 100755
--- a/tools/testing/selftests/damon/drgn_dump_damon_status.py
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -112,6 +112,8 @@ def damos_quota_to_dict(quota):
['goals', damos_quota_goals_to_list],
['goal_tuner', int],
['esz', int],
+ ['fail_charge_num', int],
+ ['fail_charge_denom', int],
['weight_sz', int],
['weight_nr_accesses', int],
['weight_age', int],
@@ -200,6 +202,7 @@ def damon_ctx_to_dict(ctx):
['attrs', attrs_to_dict],
['adaptive_targets', targets_to_list],
['schemes', schemes_to_list],
+ ['pause', bool],
])
def main():
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
index 3aa5c91548a5..aa03a1187489 100755
--- a/tools/testing/selftests/damon/sysfs.py
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -24,9 +24,12 @@ def dump_damon_status_dict(pid):
except Exception as e:
return None, 'json.load fail (%s)' % e
+kdamonds = None
def fail(expectation, status):
print('unexpected %s' % expectation)
print(json.dumps(status, indent=4))
+ if kdamonds is not None:
+ kdamonds.stop()
exit(1)
def assert_true(condition, expectation, status):
@@ -73,6 +76,10 @@ def assert_quota_committed(quota, dump):
}
assert_true(dump['goal_tuner'] == tuner_val[quota.goal_tuner],
'goal_tuner', dump)
+ assert_true(dump['fail_charge_num'] == quota.fail_charge_num,
+ 'fail_charge_num', dump)
+ assert_true(dump['fail_charge_denom'] == quota.fail_charge_denom,
+ 'fail_charge_denom', dump)
assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump)
assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil,
'weight_nr_accesses', dump)
@@ -123,11 +130,12 @@ def assert_scheme_committed(scheme, dump):
'pageout': 2,
'hugepage': 3,
'nohugeapge': 4,
- 'lru_prio': 5,
- 'lru_deprio': 6,
- 'migrate_hot': 7,
- 'migrate_cold': 8,
- 'stat': 9,
+ 'collapse': 5,
+ 'lru_prio': 6,
+ 'lru_deprio': 7,
+ 'migrate_hot': 8,
+ 'migrate_cold': 9,
+ 'stat': 10,
}
assert_true(dump['action'] == action_val[scheme.action], 'action', dump)
assert_true(dump['apply_interval_us'] == scheme. apply_interval_us,
@@ -190,21 +198,60 @@ def assert_ctx_committed(ctx, dump):
assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
assert_monitoring_targets_committed(ctx.targets, dump['adaptive_targets'])
assert_schemes_committed(ctx.schemes, dump['schemes'])
+ assert_true(dump['pause'] == ctx.pause, 'pause', dump)
def assert_ctxs_committed(kdamonds):
+ ctxs_paused_for_dump = []
+ kdamonds_paused_for_dump = []
+ # pause for safe state dumping
+ for kd in kdamonds.kdamonds:
+ for ctx in kd.contexts:
+ if ctx.pause is False:
+ ctx.pause = True
+ ctxs_paused_for_dump.append(ctx)
+ if not kd in kdamonds_paused_for_dump:
+ kdamonds_paused_for_dump.append(kd)
+ if kd in kdamonds_paused_for_dump:
+ err = kd.commit()
+ if err is not None:
+ print('pause fail (%s)' % err)
+ kdamonds.stop()
+ exit(1)
+
status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
if err is not None:
print(err)
kdamonds.stop()
exit(1)
+ # resume contexts paused for safe state dumping
+ for ctx in ctxs_paused_for_dump:
+ ctx.pause = False
+ for kd in kdamonds_paused_for_dump:
+ err = kd.commit()
+ if err is not None:
+ print('resume fail (%s)' % err)
+ kdamonds.stop()
+ exit(1)
+
+ # restore for comparison
+ for ctx in ctxs_paused_for_dump:
+ ctx.pause = True
+
ctxs = kdamonds.kdamonds[0].contexts
dump = status['contexts']
assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
for idx, ctx in enumerate(ctxs):
assert_ctx_committed(ctx, dump[idx])
+ # restore for the caller
+ for kd in kdamonds.kdamonds:
+ for ctx in kd.contexts:
+ if ctx in ctxs_paused_for_dump:
+ ctx.pause = False
+
def main():
+ global kdamonds
kdamonds = _damon_sysfs.Kdamonds(
[_damon_sysfs.Kdamond(
contexts=[_damon_sysfs.DamonCtx(
@@ -239,6 +286,8 @@ def main():
nid=1)],
goal_tuner='temporal',
reset_interval_ms=1500,
+ fail_charge_num=1,
+ fail_charge_denom=4096,
weight_sz_permil=20,
weight_nr_accesses_permil=200,
weight_age_permil=1000),
@@ -301,6 +350,7 @@ def main():
print('kdamond start failed: %s' % err)
exit(1)
kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+ kdamonds.kdamonds[0].contexts[0].pause = True
kdamonds.kdamonds[0].commit()
del kdamonds.kdamonds[0].contexts[0].targets[1]
assert_ctxs_committed(kdamonds)
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 83e3b7f63d81..78f4badb5beb 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -282,6 +282,17 @@ test_targets()
ensure_dir "$targets_dir/1" "not_exist"
}
+
+test_intervals_goal()
+{
+ goal_dir=$1
+ ensure_dir "$goal_dir" "exist"
+ ensure_file "$goal_dir/access_bp" "exist" "600"
+ ensure_file "$goal_dir/aggrs" "exist" "600"
+ ensure_file "$goal_dir/min_sample_us" "exist" "600"
+ ensure_file "$goal_dir/max_sample_us" "exist" "600"
+}
+
test_intervals()
{
intervals_dir=$1
@@ -289,6 +300,54 @@ test_intervals()
ensure_file "$intervals_dir/aggr_us" "exist" "600"
ensure_file "$intervals_dir/sample_us" "exist" "600"
ensure_file "$intervals_dir/update_us" "exist" "600"
+ test_intervals_goal "$intervals_dir/intervals_goal"
+}
+
+test_damon_filter()
+{
+ damon_filter_dir=$1
+ ensure_file "$damon_filter_dir/type" "exist" "600"
+ ensure_write_succ "$damon_filter_dir/type" "anon" "valid input"
+ ensure_write_fail "$damon_filter_dir/type" "foo" "invalid input"
+ ensure_file "$damon_filter_dir/matching" "exist" "600"
+ ensure_file "$damon_filter_dir/allow" "exist" "600"
+}
+
+test_damon_filters()
+{
+ filters_dir=$1
+ ensure_dir "$filters_dir" "exist"
+ ensure_file "$filters_dir/nr_filters" "exist" "600"
+ ensure_write_succ "$filters_dir/nr_filters" "1" "valid input"
+ test_damon_filter "$filters_dir/0"
+
+ ensure_write_succ "$filters_dir/nr_filters" "2" "valid input"
+ test_damon_filter "$filters_dir/0"
+ test_damon_filter "$filters_dir/1"
+
+ ensure_write_succ "$filters_dir/nr_filters" "0" "valid input"
+ ensure_dir "$filters_dir/0" "not_exist"
+ ensure_dir "$filters_dir/1" "not_exist"
+}
+
+test_probe()
+{
+ probe_dir=$1
+ ensure_dir "$probe_dir" "exist"
+ test_damon_filters "$probe_dir/filters"
+}
+
+test_probes()
+{
+ probes_dir=$1
+ ensure_dir "$probes_dir" "exist"
+ ensure_file "$probes_dir/nr_probes" "exist" "600"
+
+ ensure_write_succ "$probes_dir/nr_probes" "1" "valid input"
+ test_probe "$probes_dir/0"
+
+ ensure_write_succ "$probes_dir/nr_probes" "0" "valid input"
+ ensure_dir "$probes_dir/0" "not_exist"
}
test_monitoring_attrs()
@@ -296,6 +355,7 @@ test_monitoring_attrs()
monitoring_attrs_dir=$1
ensure_dir "$monitoring_attrs_dir" "exist"
test_intervals "$monitoring_attrs_dir/intervals"
+ test_probes "$monitoring_attrs_dir/probes"
test_range "$monitoring_attrs_dir/nr_regions"
}
@@ -305,6 +365,8 @@ test_context()
ensure_dir "$context_dir" "exist"
ensure_file "$context_dir/avail_operations" "exit" 400
ensure_file "$context_dir/operations" "exist" 600
+ ensure_file "$context_dir/addr_unit" "exist" 600
+ ensure_file "$context_dir/pause" "exist" 600
test_monitoring_attrs "$context_dir/monitoring_attrs"
test_targets "$context_dir/targets"
test_schemes "$context_dir/schemes"
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index be130bf585a4..6364ca02642d 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -13,6 +13,7 @@ TEST_PROGS := \
bond_options.sh \
bond_passive_lacp.sh \
bond_stacked_header_parse.sh \
+ bond_vlan_real_dev.sh \
dev_addr_lists.sh \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_vlan_real_dev.sh b/tools/testing/selftests/drivers/net/bonding/bond_vlan_real_dev.sh
new file mode 100755
index 000000000000..542d9ffc4819
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_vlan_real_dev.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test propagation of a real device's state to the VLANs stacked on top of it
+# when the real device is (or becomes) a bond member.
+#
+# The kernel mirrors a real device's UP/DOWN, MTU and feature changes onto its
+# VLANs. This is done asynchronously (netdev_work): doing it synchronously from
+# the real device's notifier could deadlock. If the real device is brought up
+# while enslaved to a bond - so its instance lock is held across NETDEV_UP - and
+# a VLAN on top of it is itself a bond member, the synchronous propagation
+# re-entered the stack and tried to take the same instance lock again.
+#
+# Cover both halves:
+# - the deferred UP/DOWN, MTU and feature propagation actually lands on the
+# VLAN (link state and MTU use an ops-locked dummy, i.e. the deferral path),
+# - the deadlock-prone topology - a VLAN on a dummy, with the VLAN and the
+# dummy each enslaved to a different bond - can be built without hanging.
+
+ALL_TESTS="
+ vlan_link_state
+ vlan_mtu
+ vlan_features
+ vlan_real_dev_enslave
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+# Return 0 if $dev in netns $ns has flag $flag set (e.g. UP) in its <...> flags.
+link_has_flag()
+{
+ local ns=$1 dev=$2 flag=$3
+
+ ip -n "$ns" link show dev "$dev" 2>/dev/null | grep -q "[<,]${flag}[,>]"
+}
+
+link_lacks_flag()
+{
+ ! link_has_flag "$@"
+}
+
+link_mtu_is()
+{
+ local ns=$1 dev=$2 want=$3 cur
+
+ cur=$(ip -n "$ns" link show dev "$dev" 2>/dev/null | \
+ sed -n 's/.* mtu \([0-9]\+\).*/\1/p')
+ [ "$cur" = "$want" ]
+}
+
+vlan_feature_is()
+{
+ local ns=$1 dev=$2 feature=$3 value=$4
+
+ ip netns exec "$ns" ethtool -k "$dev" 2>/dev/null | \
+ grep -q "^$feature: $value"
+}
+
+link_has_master()
+{
+ local ns=$1 dev=$2 master=$3
+
+ ip -n "$ns" -o link show dev "$dev" 2>/dev/null | grep -q "master $master"
+}
+
+vlan_link_state()
+{
+ RET=0
+
+ ip -n "$NS" link add ls_dummy type dummy
+ ip -n "$NS" link add link ls_dummy name ls_vlan type vlan id 100
+
+ # Bringing the real device up must propagate UP to the VLAN.
+ ip -n "$NS" link set ls_dummy up
+ busywait "$BUSYWAIT_TIMEOUT" link_has_flag "$NS" ls_vlan UP
+ check_err $? "VLAN did not go UP after the real device went UP"
+
+ # ... and likewise for DOWN.
+ ip -n "$NS" link set ls_dummy down
+ busywait "$BUSYWAIT_TIMEOUT" link_lacks_flag "$NS" ls_vlan UP
+ check_err $? "VLAN did not go DOWN after the real device went DOWN"
+
+ ip -n "$NS" link del ls_vlan
+ ip -n "$NS" link del ls_dummy
+
+ log_test "VLAN link state follows the real device"
+}
+
+vlan_mtu()
+{
+ RET=0
+
+ # The VLAN inherits the real device's MTU (2000) at creation time.
+ ip -n "$NS" link add mtu_dummy mtu 2000 type dummy
+ ip -n "$NS" link add link mtu_dummy name mtu_vlan type vlan id 100
+
+ # Shrinking the real device's MTU must clamp the VLAN's MTU.
+ ip -n "$NS" link set mtu_dummy mtu 1500
+ busywait "$BUSYWAIT_TIMEOUT" link_mtu_is "$NS" mtu_vlan 1500
+ check_err $? "VLAN MTU not clamped after the real device's MTU shrank"
+
+ ip -n "$NS" link del mtu_vlan
+ ip -n "$NS" link del mtu_dummy
+
+ log_test "VLAN MTU clamped to the real device"
+}
+
+vlan_features()
+{
+ RET=0
+
+ # Use veth as the real device: unlike dummy it exports vlan_features, so
+ # the VLAN actually inherits a toggleable offload to assert on.
+ ip -n "$NS" link add ft_veth type veth peer name ft_veth_pr
+ ip -n "$NS" link add link ft_veth name ft_vlan type vlan id 100
+
+ vlan_feature_is "$NS" ft_vlan scatter-gather on
+ check_err $? "VLAN did not inherit scatter-gather from the real device"
+
+ # Toggling the offload on the real device must propagate to the VLAN.
+ ip netns exec "$NS" ethtool -K ft_veth sg off
+ busywait "$BUSYWAIT_TIMEOUT" \
+ vlan_feature_is "$NS" ft_vlan scatter-gather off
+ check_err $? "VLAN scatter-gather still on after disabling it on real dev"
+
+ ip netns exec "$NS" ethtool -K ft_veth sg on
+ busywait "$BUSYWAIT_TIMEOUT" \
+ vlan_feature_is "$NS" ft_vlan scatter-gather on
+ check_err $? "VLAN scatter-gather still off after enabling it on real dev"
+
+ ip -n "$NS" link del ft_vlan
+ ip -n "$NS" link del ft_veth
+
+ log_test "VLAN features follow the real device"
+}
+
+vlan_real_dev_enslave()
+{
+ RET=0
+
+ # dummy <- VLAN -> bond0, then enslave the dummy itself to bond1. The
+ # last step brings the dummy up under bond1's instance lock, which used
+ # to deadlock while synchronously propagating UP to the (bond-enslaved)
+ # VLAN on top.
+ ip -n "$NS" link add dl_dummy type dummy
+ ip -n "$NS" link set dl_dummy up
+ ip -n "$NS" link add link dl_dummy name dl_vlan type vlan id 100
+
+ ip -n "$NS" link add dl_bond0 type bond mode active-backup
+ ip -n "$NS" link set dl_vlan down
+ ip -n "$NS" link set dl_vlan master dl_bond0
+ check_err $? "could not enslave the VLAN to bond0"
+
+ ip -n "$NS" link add dl_bond1 type bond mode active-backup
+ ip -n "$NS" link set dl_dummy down
+ ip -n "$NS" link set dl_dummy master dl_bond1
+ check_err $? "could not enslave the real device to bond1"
+
+ # If we got here the kernel did not deadlock; make sure it is still
+ # responsive and the enslave really took effect.
+ link_has_master "$NS" dl_dummy dl_bond1
+ check_err $? "real device not enslaved to bond1"
+
+ ip -n "$NS" link del dl_bond1
+ ip -n "$NS" link del dl_bond0
+ ip -n "$NS" link del dl_vlan
+ ip -n "$NS" link del dl_dummy
+
+ log_test "VLAN real device enslaved to a second bond"
+}
+
+setup_ns NS
+trap 'cleanup_ns $NS' EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/so_txtime.c b/tools/testing/selftests/drivers/net/so_txtime.c
index 75f3beef13d9..55a386f3d1b9 100644
--- a/tools/testing/selftests/drivers/net/so_txtime.c
+++ b/tools/testing/selftests/drivers/net/so_txtime.c
@@ -37,7 +37,7 @@
static int cfg_clockid = CLOCK_TAI;
static uint16_t cfg_port = 8000;
-static int cfg_variance_us = 4000;
+static int cfg_variance_us = 8000;
static bool cfg_machine_slow;
static uint64_t cfg_start_time_ns;
static int cfg_mark;
diff --git a/tools/testing/selftests/filelock/.gitignore b/tools/testing/selftests/filelock/.gitignore
new file mode 100644
index 000000000000..825e899a121b
--- /dev/null
+++ b/tools/testing/selftests/filelock/.gitignore
@@ -0,0 +1 @@
+ofdlocks
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
index ff8d47fc373a..68bac28b234b 100644
--- a/tools/testing/selftests/filelock/ofdlocks.c
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -16,7 +16,7 @@ static int lock_set(int fd, struct flock *fl)
fl->l_whence = SEEK_SET;
ret = fcntl(fd, F_OFD_SETLK, fl);
if (ret)
- perror("fcntl()");
+ ksft_perror("fcntl()");
return ret;
}
@@ -28,7 +28,7 @@ static int lock_get(int fd, struct flock *fl)
fl->l_whence = SEEK_SET;
ret = fcntl(fd, F_OFD_GETLK, fl);
if (ret)
- perror("fcntl()");
+ ksft_perror("fcntl()");
return ret;
}
@@ -39,94 +39,82 @@ int main(void)
int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
int fd2 = open("/tmp/aa", O_RDONLY);
+ ksft_print_header();
+ ksft_set_plan(4);
+
unlink("/tmp/aa");
assert(fd != -1);
assert(fd2 != -1);
- ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+ ksft_print_msg("opened fds %i %i\n", fd, fd2);
/* Set some read lock */
fl.l_type = F_RDLCK;
fl.l_start = 5;
fl.l_len = 3;
rc = lock_set(fd, &fl);
- if (rc == 0) {
- ksft_print_msg
- ("[SUCCESS] set OFD read lock on first fd\n");
- } else {
- ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
- return -1;
- }
+ ksft_test_result(rc == 0, "set OFD read lock on first fd\n");
+ if (rc != 0)
+ ksft_finished();
+
/* Make sure read locks do not conflict on different fds. */
fl.l_type = F_RDLCK;
fl.l_start = 5;
fl.l_len = 1;
rc = lock_get(fd2, &fl);
if (rc != 0)
- return -1;
- if (fl.l_type != F_UNLCK) {
- ksft_print_msg("[FAIL] read locks conflicted\n");
- return -1;
- }
+ ksft_finished();
+ if (fl.l_type != F_UNLCK)
+ ksft_exit_fail_msg("read locks conflicted\n");
+
/* Make sure read/write locks do conflict on different fds. */
fl.l_type = F_WRLCK;
fl.l_start = 5;
fl.l_len = 1;
rc = lock_get(fd2, &fl);
if (rc != 0)
- return -1;
- if (fl.l_type != F_UNLCK) {
- ksft_print_msg
- ("[SUCCESS] read and write locks conflicted\n");
- } else {
- ksft_print_msg
- ("[SUCCESS] read and write locks not conflicted\n");
- return -1;
- }
+ ksft_finished();
+ ksft_test_result(fl.l_type != F_UNLCK,
+ "read and write locks conflicted\n");
+ if (fl.l_type == F_UNLCK)
+ ksft_finished();
+
/* Get info about the lock on first fd. */
fl.l_type = F_UNLCK;
fl.l_start = 5;
fl.l_len = 1;
rc = lock_get(fd, &fl);
- if (rc != 0) {
- ksft_print_msg
- ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
- return -1;
- }
- if (fl.l_type != F_UNLCK) {
- ksft_print_msg
- ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
- fl.l_type, fl.l_pid, fl.l_len);
- } else {
- ksft_print_msg
- ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
- return -1;
- }
+ if (rc != 0)
+ ksft_exit_fail_msg("F_OFD_GETLK with F_UNLCK not supported\n");
+ ksft_test_result(fl.l_type != F_UNLCK,
+ "F_OFD_GETLK with F_UNLCK returned lock info\n");
+ if (fl.l_type == F_UNLCK)
+ ksft_exit_fail();
+ ksft_print_msg("F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+
/* Try the same but by locking everything by len==0. */
fl2.l_type = F_UNLCK;
fl2.l_start = 0;
fl2.l_len = 0;
rc = lock_get(fd, &fl2);
- if (rc != 0) {
- ksft_print_msg
- ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
- return -1;
- }
+ if (rc != 0)
+ ksft_exit_fail_msg
+ ("F_OFD_GETLK with F_UNLCK not supported\n");
+ ksft_test_result(memcmp(&fl, &fl2, sizeof(fl)) == 0,
+ "F_UNLCK with len==0 returned the same\n");
if (memcmp(&fl, &fl2, sizeof(fl))) {
- ksft_print_msg
- ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ ksft_exit_fail_msg
+ ("F_UNLCK test returns: locked, type %i pid %i len %zi\n",
fl.l_type, fl.l_pid, fl.l_len);
- return -1;
}
- ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+
/* Get info about the lock on second fd - no locks on it. */
fl.l_type = F_UNLCK;
fl.l_start = 0;
fl.l_len = 0;
lock_get(fd2, &fl);
- if (fl.l_type != F_UNLCK) {
- ksft_print_msg
- ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
- return -1;
- }
- return 0;
+ ksft_test_result(fl.l_type == F_UNLCK,
+ "F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
index e71cc3ad0bdf..6d00d3c0f493 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -6,7 +6,7 @@
original_group=`stat -c "%g" .`
original_owner=`stat -c "%u" .`
-local mount_point=$(get_mount_point)
+mount_point=$(get_mount_point)
mount_options=$(get_mnt_options "$mount_point")
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
index 8e905d4fe6dd..f985ff391463 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -36,15 +36,23 @@ make_str() {
data=`printf -- 'X%.0s' $(seq $cnt)`
- printf "${val}${data}"
+ # Return escape-sequence text (e.g. "\003\000..."); the caller
+ # converts to binary. Shell command substitution strips NUL bytes,
+ # so the binary form cannot survive being captured into a variable.
+ printf '%s' "${val}${data}"
}
write_buffer() {
id=$1
size=$2
- # write the string into the raw marker
- make_str $id $size > trace_marker_raw
+ str=`make_str $id $size`
+ len=`printf "$str" | wc -c`
+ # Pipe through dd to ensure a single atomic write() syscall
+ # on architectures with 64K pages, where shell's printf builtin
+ # uses stdio buffering which may split the output into multiple
+ # writes.
+ printf "$str" | dd of=trace_marker_raw bs=$len iflag=fullblock
}
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 50ec9e0406ab..96071b4800e8 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -105,13 +105,6 @@ $(MAKE_DIRS):
$(call msg,MKDIR,,$@)
$(Q)mkdir -p $@
-# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
-ifeq ($(SRCARCH),x86)
-LLD := lld
-else
-LLD := ld
-endif
-
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index d1fe5dbc2813..d44b34b05757 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -556,6 +556,21 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
1, &num_inv);
assert(!num_inv);
+ /* Negative test: entry_len is bounded by PAGE_SIZE */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ PAGE_SIZE + 1, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: entry_num is bounded */
+#define IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX (1U << 19)
+ num_inv = IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
/* Negative test: invalid flag is passed */
num_inv = 1;
inv_reqs[0].flags = 0xffffffff;
@@ -2980,22 +2995,54 @@ TEST_F(iommufd_viommu, vdevice_alloc)
uint32_t veventq_id;
uint32_t veventq_fd;
int prev_seq = -1;
+ size_t hdr_size = sizeof(struct iommufd_vevent_header);
+ char vbuf[64];
if (dev_id) {
/* Must allocate vdevice before attaching to a nested hwpt */
test_err_mock_domain_replace(ENOENT, self->stdev_id,
self->nested_hwpt_id);
+ /* Test depth lower and upper bounds (mirrors kernel cap) */
+#define VEVENTQ_MAX_DEPTH (1U << 19)
+ test_err_veventq_alloc(EINVAL, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 0, NULL,
+ NULL);
+ test_err_veventq_alloc(EINVAL, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST,
+ VEVENTQ_MAX_DEPTH + 1, NULL, NULL);
+ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
+ VEVENTQ_MAX_DEPTH, &veventq_id,
+ &veventq_fd);
+ close(veventq_fd);
+ test_ioctl_destroy(veventq_id);
+
/* Allocate a vEVENTQ with veventq_depth=2 */
test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
- &veventq_id, &veventq_fd);
+ 2, &veventq_id, &veventq_fd);
test_err_veventq_alloc(EEXIST, viommu_id,
- IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL);
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 2, NULL,
+ NULL);
+
+ /* Invalid read counts on an empty vEVENTQ */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size - 1));
+ ASSERT_EQ(EINVAL, errno);
+
/* Set vdev_id to 0x99, unset it, and set to 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
test_cmd_mock_domain_replace(self->stdev_id,
self->nested_hwpt_id);
test_cmd_trigger_vevents(dev_id, 1);
+
+ /* Invalid read counts on a non-empty vEVENTQ */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, 0));
+ ASSERT_EQ(EINVAL, errno);
+ /* header fits but the event's payload doesn't */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size));
+ ASSERT_EQ(EINVAL, errno);
+
test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq);
test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99,
&vdev_id);
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index 45c14323a618..25495d8dceb3 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -712,7 +712,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
return -1;
if (_test_cmd_veventq_alloc(self->fd, viommu_id,
- IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 2, &veventq_id,
&veventq_fd))
return -1;
close(veventq_fd);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 5502751d500c..b4928cbd4d9c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -1060,12 +1060,13 @@ static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
base_addr, len, out_qid))
static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
- __u32 *veventq_id, __u32 *veventq_fd)
+ __u32 depth, __u32 *veventq_id,
+ __u32 *veventq_fd)
{
struct iommu_veventq_alloc cmd = {
.size = sizeof(cmd),
.type = type,
- .veventq_depth = 2,
+ .veventq_depth = depth,
.viommu_id = viommu_id,
};
int ret;
@@ -1080,13 +1081,13 @@ static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
return 0;
}
-#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \
- ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+#define test_cmd_veventq_alloc(viommu_id, type, depth, veventq_id, veventq_fd) \
+ ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \
veventq_id, veventq_fd))
-#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \
- veventq_fd) \
- EXPECT_ERRNO(_errno, \
- _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+#define test_err_veventq_alloc(_errno, viommu_id, type, depth, veventq_id, \
+ veventq_fd) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \
veventq_id, veventq_fd))
static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh
index 49fdac8e8b15..0014bd76e88d 100755
--- a/tools/testing/selftests/kho/vmtest.sh
+++ b/tools/testing/selftests/kho/vmtest.sh
@@ -59,10 +59,14 @@ function build_kernel() {
tee "$kconfig" > "$kho_config" <<EOF
CONFIG_BLK_DEV_INITRD=y
CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_DEBUG=y
CONFIG_KEXEC_HANDOVER_DEBUGFS=y
CONFIG_TEST_KEXEC_HANDOVER=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_SMP=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
$arch_kconfig
EOF
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 9118a5a51b89..d28a057fa6c2 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -97,6 +97,7 @@ TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
TEST_GEN_PROGS_x86 += x86/nested_set_state_test
+TEST_GEN_PROGS_x86 += x86/nested_tdp_fault_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
@@ -117,6 +118,7 @@ TEST_GEN_PROGS_x86 += x86/svm_nested_clear_efer_svme
TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
TEST_GEN_PROGS_x86 += x86/svm_nested_vmcb12_gpa
+TEST_GEN_PROGS_x86 += x86/svm_nested_pat_test
TEST_GEN_PROGS_x86 += x86/svm_lbr_nested_state
TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
TEST_GEN_PROGS_x86 += x86/sync_regs_test
@@ -140,6 +142,7 @@ TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_dbg_test
TEST_GEN_PROGS_x86 += x86/sev_init2_tests
TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
TEST_GEN_PROGS_x86 += x86/sev_smoke_test
@@ -210,6 +213,7 @@ TEST_GEN_PROGS_s390 += s390/keyop
TEST_GEN_PROGS_s390 += rseq_test
TEST_GEN_PROGS_s390 += s390/irq_routing
TEST_GEN_PROGS_s390 += mmu_stress_test
+TEST_GEN_PROGS_s390 += pre_fault_memory_test
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c
index 25b2e3222f68..ab57902ce429 100644
--- a/tools/testing/selftests/kvm/arm64/no-vgic.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic.c
@@ -159,6 +159,7 @@ static void guest_code_gicv5(void)
check_gicv5_gic_op(CDAFF);
check_gicv5_gic_op(CDDI);
check_gicv5_gic_op(CDDIS);
+ check_gicv5_gic_op(CDEN);
check_gicv5_gic_op(CDEOI);
check_gicv5_gic_op(CDHM);
check_gicv5_gic_op(CDPEND);
diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c
index d785b660d847..96cfd6bb32f6 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_v5.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c
@@ -20,8 +20,6 @@ struct vm_gic {
u32 gic_dev_type;
};
-static u64 max_phys_size;
-
#define GUEST_CMD_IRQ_CDIA 10
#define GUEST_CMD_IRQ_DIEOI 11
#define GUEST_CMD_IS_AWAKE 12
@@ -131,6 +129,8 @@ static void test_vgic_v5_ppis(u32 gic_dev_type)
while (1) {
ret = run_vcpu(vcpus[0]);
+ if (ret)
+ break;
switch (get_ucall(vcpus[0], &uc)) {
case UCALL_SYNC:
@@ -146,7 +146,7 @@ static void test_vgic_v5_ppis(u32 gic_dev_type)
irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3);
irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
- _kvm_irq_line(v.vm, irq, level);
+ kvm_irq_line(v.vm, irq, level);
} else if (uc.args[1] == GUEST_CMD_IS_AWAKE) {
pr_info("Guest skipping WFI due to pending IRQ\n");
} else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) {
@@ -208,13 +208,9 @@ void run_tests(u32 gic_dev_type)
int main(int ac, char **av)
{
int ret;
- int pa_bits;
test_disable_default_vgic();
- pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
- max_phys_size = 1ULL << pa_bits;
-
ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5);
if (ret) {
pr_info("No GICv5 support; Not running GIC_v5 tests.\n");
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 12446a4b6e8d..74ca096bf976 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -694,7 +694,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
for (iteration = 1; iteration <= p->iterations; iteration++) {
- unsigned long i;
+ unsigned long i, reap_i;
+
+ /*
+ * Select a random point in the time interval to reap the dirty
+ * bitmap/ring while the guest is running, i.e. randomize how
+ * long the guest gets to initially run and thus how many pages
+ * it can dirty, before collecting the dirty bitmap/ring. See
+ * the loop below for details.
+ */
+ reap_i = random() % p->interval;
+ printf("Reaping after a %lu ms delay\n", reap_i);
sync_global_to_guest(vm, iteration);
@@ -729,13 +739,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* that's effectively blocked. Collecting while the
* guest is running also verifies KVM doesn't lose any
* state.
- *
+ */
+ if (i < reap_i)
+ continue;
+
+ /*
* For bitmap modes, KVM overwrites the entire bitmap,
* i.e. collecting the bitmaps is destructive. Collect
- * the bitmap only on the first pass, otherwise this
- * test would lose track of dirty pages.
+ * the bitmap while the guest is running only once,
+ * otherwise this test would lose track of dirty pages.
*/
- if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ if (i > reap_i && host_log_mode != LOG_MODE_DIRTY_RING)
continue;
/*
@@ -745,7 +759,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the ring on every pass would make it unlikely the
* vCPU would ever fill the fing).
*/
- if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ if (i > reap_i && !READ_ONCE(dirty_ring_vcpu_ring_full))
continue;
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 832ef4dfb99f..2233d871a38f 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -345,6 +345,16 @@ static void test_invalid_punch_hole(int fd, size_t total_size)
}
}
+static void test_invalid_binding(struct kvm_vm *vm, int fd, size_t size)
+{
+ int r;
+
+ r = __vm_set_user_memory_region2(vm, 0, KVM_MEM_GUEST_MEMFD, 0, size, 0,
+ fd, ALIGN_DOWN(INT64_MAX, page_size));
+ TEST_ASSERT(r && errno == EINVAL,
+ "Memslot with out-of-range offset+size should fail");
+}
+
static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
u64 guest_memfd_flags)
{
@@ -408,17 +418,26 @@ static void test_guest_memfd_flags(struct kvm_vm *vm)
}
}
-#define __gmem_test(__test, __vm, __flags, __gmem_size) \
+#define ____gmem_test(__test, __vm, __flags, __gmem_size, args...) \
do { \
int fd = vm_create_guest_memfd(__vm, __gmem_size, __flags); \
\
- test_##__test(fd, __gmem_size); \
+ test_##__test(args); \
close(fd); \
} while (0)
+#define __gmem_test(__test, __vm, __flags, __gmem_size) \
+ ____gmem_test(__test, __vm, __flags, __gmem_size, fd, __gmem_size)
+
#define gmem_test(__test, __vm, __flags) \
__gmem_test(__test, __vm, __flags, page_size * 4)
+#define __gmem_test_vm(__test, __vm, __flags, __gmem_size) \
+ ____gmem_test(__test, __vm, __flags, __gmem_size, __vm, fd, __gmem_size)
+
+#define gmem_test_vm(__test, __vm, __flags) \
+ __gmem_test_vm(__test, __vm, __flags, page_size * 4)
+
static void __test_guest_memfd(struct kvm_vm *vm, u64 flags)
{
test_create_guest_memfd_multiple(vm);
@@ -447,6 +466,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, u64 flags)
gmem_test(file_size, vm, flags);
gmem_test(fallocate, vm, flags);
gmem_test(invalid_punch_hole, vm, flags);
+ gmem_test_vm(invalid_binding, vm, flags);
}
static void test_guest_memfd(unsigned long vm_type)
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
index 067a4c9cf452..6cb3bed29b81 100644
--- a/tools/testing/selftests/kvm/include/kvm_syscalls.h
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -89,4 +89,10 @@ __KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, l
__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
__KVM_SYSCALL_DEFINE(madvise, 3, void *, addr, size_t, length, int, advice);
+#define kvm_free_fd(fd) \
+do { \
+ kvm_close(fd); \
+ (fd) = -1; \
+} while (0)
+
#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2ecaaa0e9965..04a910164a29 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -876,7 +876,7 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
{
int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
- TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vcpu->vm);
return fd;
}
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
index d32ff5d8ffd0..49c8139d398c 100644
--- a/tools/testing/selftests/kvm/include/lru_gen_util.h
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -14,7 +14,7 @@
#include "test_util.h"
#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
-#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+#define MAX_NR_NODES 32 /* Maximum number of nodes supported by the test */
#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
index 41a265742666..e5259f63be22 100644
--- a/tools/testing/selftests/kvm/include/s390/facility.h
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
@@ -11,6 +11,7 @@
#ifndef SELFTEST_KVM_FACILITY_H
#define SELFTEST_KVM_FACILITY_H
+#include <linux/atomic.h>
#include <linux/bitops.h>
/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
@@ -19,6 +20,11 @@
extern u64 stfl_doublewords[NB_STFL_DOUBLEWORDS];
extern bool stfle_flag;
+static inline bool clear_bit_inv(unsigned long nr, unsigned long *ptr)
+{
+ return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
{
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 77f576ee7789..513e4a1075fa 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -38,7 +38,24 @@ extern u64 guest_tsc_khz;
const char *ex_str(int vector);
-#define X86_EFLAGS_FIXED (1u << 1)
+#define X86_EFLAGS_CF BIT(0) /* Carry Flag */
+#define X86_EFLAGS_FIXED BIT(1) /* Bit 1 - always on */
+#define X86_EFLAGS_PF BIT(2) /* Parity Flag */
+#define X86_EFLAGS_AF BIT(4) /* Auxiliary carry Flag */
+#define X86_EFLAGS_ZF BIT(6) /* Zero Flag */
+#define X86_EFLAGS_SF BIT(7) /* Sign Flag */
+#define X86_EFLAGS_TF BIT(8) /* Trap Flag */
+#define X86_EFLAGS_IF BIT(9) /* Interrupt Flag */
+#define X86_EFLAGS_DF BIT(10) /* Direction Flag */
+#define X86_EFLAGS_OF BIT(11) /* Overflow Flag */
+#define X86_EFLAGS_IOPL BIT(12) /* I/O Privilege Level (2 bits) */
+#define X86_EFLAGS_NT BIT(14) /* Nested Task */
+#define X86_EFLAGS_RF BIT(16) /* Resume Flag */
+#define X86_EFLAGS_VM BIT(17) /* Virtual Mode */
+#define X86_EFLAGS_AC BIT(18) /* Alignment Check/Access Control */
+#define X86_EFLAGS_VIF BIT(19) /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP BIT(20) /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID BIT(21) /* CPUID detection */
#define X86_CR4_VME (1ul << 0)
#define X86_CR4_PVI (1ul << 1)
@@ -209,6 +226,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
+#define X86_FEATURE_GP_ON_USER_CPUID KVM_X86_CPU_FEATURE(0x80000021, 0, EAX, 17)
#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
@@ -1556,6 +1574,15 @@ u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa);
#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+#define EPT_VIOLATION_ACC_READ BIT(0)
+#define EPT_VIOLATION_ACC_WRITE BIT(1)
+#define EPT_VIOLATION_ACC_INSTR BIT(2)
+#define EPT_VIOLATION_PROT_READ BIT(3)
+#define EPT_VIOLATION_PROT_WRITE BIT(4)
+#define EPT_VIOLATION_PROT_EXEC BIT(5)
+#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
+
bool sys_clocksource_is_based_on_tsc(void);
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 1af44c151d60..dec383e59a47 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -144,4 +144,28 @@ static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa,
vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
}
+static inline void sev_dbg_crypt_memory(struct kvm_vm *vm, unsigned int cmd,
+ void *dst, void *src, unsigned int len)
+{
+ struct kvm_sev_dbg dbg = {
+ .src_uaddr = (unsigned long)src,
+ .dst_uaddr = (unsigned long)dst,
+ .len = len,
+ };
+
+ vm_sev_ioctl(vm, cmd, &dbg);
+}
+
+static inline void sev_decrypt_memory(struct kvm_vm *vm, void *dst, void *src,
+ unsigned int len)
+{
+ sev_dbg_crypt_memory(vm, KVM_SEV_DBG_DECRYPT, dst, src, len);
+}
+
+static inline void sev_encrypt_memory(struct kvm_vm *vm, void *dst, void *src,
+ unsigned int len)
+{
+ sev_dbg_crypt_memory(vm, KVM_SEV_DBG_ENCRYPT, dst, src, len);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index fc5242fb956f..a910e3abb8c7 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -230,6 +230,7 @@ struct test_params {
u64 phys_offset;
u64 test_mem_size;
enum vm_mem_backing_src_type src_type;
+ bool misalign_slot_gpa;
};
static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
@@ -244,6 +245,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
u64 guest_num_pages;
u64 alignment;
void *host_test_mem;
+ struct userspace_mem_region *region;
struct kvm_vm *vm;
/* Align up the test memory size */
@@ -276,13 +278,22 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
/* Add an extra memory slot with specified backing src type */
vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
+ region = memslot2region(vm, TEST_MEM_SLOT_INDEX);
+ host_test_mem = region->host_mem;
+
+ if (p->misalign_slot_gpa) {
+ TEST_ASSERT(is_backing_src_hugetlb(src_type),
+ "Memslot GPA misalignment requires hugetlb backing");
+ TEST_ASSERT(guest_num_pages > 1,
+ "Need at least two guest pages to misalign memslot GPA");
+
+ guest_test_phys_mem += guest_page_size;
+ vm_mem_region_move(vm, TEST_MEM_SLOT_INDEX, guest_test_phys_mem);
+ }
/* Do mapping(GVA->GPA) for the testing memory slot */
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
- /* Cache the HVA pointer of the region */
- host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem);
-
/* Export shared structure test_args to guest */
sync_global_to_guest(vm, test_args);
@@ -417,8 +428,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-p offset] [-m mode] "
- "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
+ printf("usage: %s [-h] [-p offset] [-m mode] [-b mem-size]\n", name);
+ printf(" [-v vcpus] [-s mem-type] [-u]\n");
puts("");
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
@@ -428,6 +439,8 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n");
backing_src_help("-s");
+ printf(" -u: move the test memslot GPA by one guest page after creating\n"
+ " the memslot, forcing a hugetlb HVA/GPA offset mismatch\n");
puts("");
}
@@ -442,7 +455,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
+ while ((opt = getopt(argc, argv, "hp:m:b:v:s:u")) != -1) {
switch (opt) {
case 'p':
p.phys_offset = strtoull(optarg, NULL, 0);
@@ -461,6 +474,9 @@ int main(int argc, char *argv[])
case 's':
p.src_type = parse_backing_src_type(optarg);
break;
+ case 'u':
+ p.misalign_slot_gpa = true;
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e08967ef7b7b..195f3fdae1e3 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -77,7 +77,8 @@ static ssize_t get_module_param(const char *module_name, const char *param,
int fd, r;
/* Verify KVM is loaded, to provide a more helpful SKIP message. */
- close(open_kvm_dev_path_or_exit());
+ fd = open_kvm_dev_path_or_exit();
+ kvm_free_fd(fd);
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
module_name, param);
@@ -90,8 +91,7 @@ static ssize_t get_module_param(const char *module_name, const char *param,
TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
path, bytes_read, buffer_size);
- r = close(fd);
- TEST_ASSERT(!r, "close(%s) failed", path);
+ kvm_free_fd(fd);
return bytes_read;
}
@@ -160,7 +160,7 @@ unsigned int kvm_check_cap(long cap)
ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
- close(kvm_fd);
+ kvm_free_fd(kvm_fd);
return (unsigned int)ret;
}
@@ -747,8 +747,7 @@ static void kvm_stats_release(struct kvm_binary_stats *stats)
stats->desc = NULL;
}
- kvm_close(stats->fd);
- stats->fd = -1;
+ kvm_free_fd(stats->fd);
}
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
@@ -777,7 +776,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_munmap(vcpu->run, vcpu_mmap_sz());
- kvm_close(vcpu->fd);
+ kvm_free_fd(vcpu->fd);
kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -793,8 +792,8 @@ void kvm_vm_release(struct kvm_vm *vmp)
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- kvm_close(vmp->fd);
- kvm_close(vmp->kvm_fd);
+ kvm_free_fd(vmp->fd);
+ kvm_free_fd(vmp->kvm_fd);
/* Free cached stats metadata and close FD */
kvm_stats_release(&vmp->stats);
@@ -815,10 +814,10 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
kvm_munmap(region->mmap_alias, region->mmap_size);
- close(region->fd);
+ kvm_free_fd(region->fd);
}
- if (region->region.guest_memfd >= 0)
- close(region->region.guest_memfd);
+ if ((int)region->region.guest_memfd >= 0)
+ kvm_free_fd(region->region.guest_memfd);
free(region);
}
@@ -1311,7 +1310,7 @@ static size_t vcpu_mmap_sz(void)
TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
- close(dev_fd);
+ kvm_free_fd(dev_fd);
return ret;
}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index b51467d70f6e..4ca48de7a926 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -848,7 +848,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
/* Setup guest general purpose registers */
vcpu_regs_get(vcpu, &regs);
- regs.rflags = regs.rflags | 0x2;
+ regs.rflags = regs.rflags | X86_EFLAGS_FIXED;
regs.rsp = stack_gva;
vcpu_regs_set(vcpu, &regs);
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index 67642759e4a0..7c10ba6e6fb4 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -360,7 +360,7 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp)
vmwrite(GUEST_DR7, 0x400);
vmwrite(GUEST_RSP, (u64)rsp);
vmwrite(GUEST_RIP, (u64)rip);
- vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_RFLAGS, X86_EFLAGS_FIXED);
vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index e977e979470f..4d9ad6104a6e 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -111,6 +111,7 @@ struct sync_area {
*/
static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+static int wait_timeout = 10;
static sem_t vcpu_ready;
static bool map_unmap_verify;
@@ -418,7 +419,7 @@ static bool _guest_should_exit(void)
*/
static noinline void host_perform_sync(struct sync_area *sync)
{
- alarm(10);
+ alarm(wait_timeout);
atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
@@ -900,7 +901,7 @@ static void help(char *name, struct test_args *targs)
{
int ctr;
- pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+ pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count] [-t wait_timeout]\n",
name);
pr_info(" -h: print this help screen.\n");
pr_info(" -v: enable verbose mode (not for benchmarking).\n");
@@ -916,6 +917,8 @@ static void help(char *name, struct test_args *targs)
targs->seconds);
pr_info(" -r: specify the number of runs per test (currently: %i)\n",
targs->runs);
+ pr_info(" -t: specify the number of seconds for host wait timeout (currently: %i)\n",
+ wait_timeout);
pr_info("\nAvailable tests:\n");
for (ctr = 0; ctr < NTESTS; ctr++)
@@ -964,7 +967,7 @@ static bool parse_args(int argc, char *argv[],
u32 max_mem_slots;
int opt;
- while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:t:")) != -1) {
switch (opt) {
case 'h':
default:
@@ -1007,6 +1010,9 @@ static bool parse_args(int argc, char *argv[],
case 'r':
targs->runs = atoi_positive("Runs per test", optarg);
break;
+ case 't':
+ wait_timeout = atoi_positive("Host wait timeout", optarg);
+ break;
}
}
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index fcb57fd034e6..a0fcae3cb7a8 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -11,6 +11,7 @@
#include <kvm_util.h>
#include <processor.h>
#include <pthread.h>
+#include <ucall_common.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
@@ -167,7 +168,6 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
.type = vm_type,
};
struct kvm_vcpu *vcpu;
- struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
@@ -193,11 +193,6 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
- run = vcpu->run;
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8d6fdb5d38b8..cb86cb6b3635 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -27,6 +27,7 @@ enum {
};
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
+static bool sbi_ext_enabled[KVM_RISCV_SBI_EXT_MAX];
bool filter_reg(__u64 reg)
{
@@ -149,6 +150,14 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA];
+ /*
+ * FWFT misaligned delegation registers are always visible when the SBI FWFT
+ * extension is enable and the host supports the misaligned delegation.
+ */
+ case KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable):
+ case KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags):
+ case KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value):
+ return sbi_ext_enabled[KVM_RISCV_SBI_EXT_FWFT];
default:
break;
}
@@ -193,6 +202,27 @@ static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_subli
return 0;
}
+void check_fwft_feature(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, u64 feature)
+{
+ unsigned long value;
+ int rc;
+
+ /* Enable SBI FWFT extension so that we can check the supported register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return;
+
+ for (int i = 0; i < s->regs_n; i++) {
+ if ((s->regs[i] & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_STATE) {
+ rc = __vcpu_get_reg(vcpu, s->regs[i], &value);
+ __TEST_REQUIRE(!rc, "%s not available, skipping tests", s->name);
+ }
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -235,6 +265,9 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
break;
case VCPU_FEATURE_SBI_EXT:
feature = RISCV_SBI_EXT_REG(s->feature);
+ if (s->feature == KVM_RISCV_SBI_EXT_FWFT)
+ check_fwft_feature(vcpu, s, feature);
+ sbi_ext_enabled[s->feature] = true;
break;
default:
TEST_FAIL("Unknown feature type");
@@ -897,11 +930,15 @@ static __u64 sbi_sta_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
};
-static __u64 sbi_fwft_regs[] = {
+static __u64 sbi_fwft_misaligned_deleg_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value),
+};
+
+static __u64 sbi_fwft_pointer_masking_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value),
@@ -1013,7 +1050,7 @@ static __u64 fp_d_regs[] = {
};
/* Define a default vector registers with length. This will be overwritten at runtime */
-static __u64 vector_regs[] = {
+static __u64 v_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
@@ -1057,37 +1094,17 @@ static __u64 vector_regs[] = {
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
-#define SUBLIST_SBI_BASE \
- {"sbi-base", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_V01, \
- .regs = sbi_base_regs, .regs_n = ARRAY_SIZE(sbi_base_regs),}
-#define SUBLIST_SBI_STA \
- {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
- .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
-#define SUBLIST_SBI_FWFT \
- {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \
- .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),}
-#define SUBLIST_ZICBOM \
- {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
-#define SUBLIST_ZICBOP \
- {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),}
-#define SUBLIST_ZICBOZ \
- {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
-#define SUBLIST_AIA \
- {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
-#define SUBLIST_SMSTATEEN \
- {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),}
-#define SUBLIST_FP_F \
- {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
- .regs_n = ARRAY_SIZE(fp_f_regs),}
-#define SUBLIST_FP_D \
- {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
- .regs_n = ARRAY_SIZE(fp_d_regs),}
-
-#define SUBLIST_V \
- {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
+#define SUBLIST_ISA(ext, extu) \
+ { \
+ .name = #ext, \
+ .feature = KVM_RISCV_ISA_EXT_##extu, \
+ .regs = ext##_regs, \
+ .regs_n = ARRAY_SIZE(ext##_regs), \
+ }
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
-static __u64 regs_##ext[] = { \
+static __u64 ext##_regs[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | \
KVM_RISCV_ISA_EXT_##extu, \
@@ -1095,18 +1112,22 @@ static __u64 regs_##ext[] = { \
static struct vcpu_reg_list config_##ext = { \
.sublists = { \
SUBLIST_BASE, \
- { \
- .name = #ext, \
- .feature = KVM_RISCV_ISA_EXT_##extu, \
- .regs = regs_##ext, \
- .regs_n = ARRAY_SIZE(regs_##ext), \
- }, \
+ SUBLIST_ISA(ext, extu), \
{0}, \
}, \
} \
+#define SUBLIST_SBI(ext, extu) \
+ { \
+ .name = "sbi-"#ext, \
+ .feature_type = VCPU_FEATURE_SBI_EXT, \
+ .feature = KVM_RISCV_SBI_EXT_##extu, \
+ .regs = sbi_##ext##_regs, \
+ .regs_n = ARRAY_SIZE(sbi_##ext##_regs), \
+ }
+
#define KVM_SBI_EXT_SIMPLE_CONFIG(ext, extu) \
-static __u64 regs_sbi_##ext[] = { \
+static __u64 sbi_##ext##_regs[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | \
KVM_RISCV_SBI_EXT_##extu, \
@@ -1114,13 +1135,7 @@ static __u64 regs_sbi_##ext[] = { \
static struct vcpu_reg_list config_sbi_##ext = { \
.sublists = { \
SUBLIST_BASE, \
- { \
- .name = "sbi-"#ext, \
- .feature_type = VCPU_FEATURE_SBI_EXT, \
- .feature = KVM_RISCV_SBI_EXT_##extu, \
- .regs = regs_sbi_##ext, \
- .regs_n = ARRAY_SIZE(regs_sbi_##ext), \
- }, \
+ SUBLIST_SBI(ext, extu), \
{0}, \
}, \
} \
@@ -1129,7 +1144,7 @@ static struct vcpu_reg_list config_sbi_##ext = { \
static struct vcpu_reg_list config_##ext = { \
.sublists = { \
SUBLIST_BASE, \
- SUBLIST_##extu, \
+ SUBLIST_ISA(ext, extu), \
{0}, \
}, \
} \
@@ -1138,24 +1153,23 @@ static struct vcpu_reg_list config_##ext = { \
static struct vcpu_reg_list config_sbi_##ext = { \
.sublists = { \
SUBLIST_BASE, \
- SUBLIST_SBI_##extu, \
+ SUBLIST_SBI(ext, extu), \
{0}, \
}, \
} \
/* Note: The below list is alphabetically sorted. */
-KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE);
+KVM_SBI_EXT_SUBLIST_CONFIG(base, V01);
KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
-KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
-KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
-KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
-KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(aia, SSAIA);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, F);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, D);
KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
@@ -1228,6 +1242,23 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED);
KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH);
KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT);
+static struct vcpu_reg_list config_sbi_fwft_misaligned_deleg = {
+ .sublists = {
+ SUBLIST_BASE,
+ SUBLIST_SBI(fwft_misaligned_deleg, FWFT),
+ {0},
+ },
+};
+
+static struct vcpu_reg_list config_sbi_fwft_pointer_masking = {
+ .sublists = {
+ SUBLIST_BASE,
+ SUBLIST_ISA(smnpm, SMNPM),
+ SUBLIST_SBI(fwft_pointer_masking, FWFT),
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_base,
&config_sbi_sta,
@@ -1235,7 +1266,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_dbcn,
&config_sbi_susp,
&config_sbi_mpxy,
- &config_sbi_fwft,
+ &config_sbi_fwft_misaligned_deleg,
+ &config_sbi_fwft_pointer_masking,
&config_aia,
&config_fp_f,
&config_fp_d,
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index e39a724fe860..15d81b2ed7ad 100644
--- a/tools/testing/selftests/kvm/s390/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -34,16 +34,22 @@ static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
/**
* Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
* so use_cmma goes on and the CMMA related ioctls do something.
+ * Touch the page at offset 1M inside TEST_DATA to make sure its page
+ * tables are allocated in the host.
*/
static void guest_do_one_essa(void)
{
asm volatile(
/* load TEST_DATA_START_GFN into r1 */
+ " xgr 1,1\n"
" llilf 1,%[start_gfn]\n"
/* calculate the address from the gfn */
" sllg 1,1,12(0)\n"
/* set the first page in TEST_DATA memslot to STABLE */
" .insn rrf,0xb9ab0000,2,1,1,0\n"
+ " agfi 1,0x100000\n"
+ /* also touch the first page of the second MB of TEST_DATA */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
/* hypercall */
" diag 0,0,0x501\n"
"0: j 0b"
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
index 714906c1d12a..b24c1f9dbbe8 100644
--- a/tools/testing/selftests/kvm/s390/user_operexec.c
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -6,6 +6,7 @@
* Authors:
* Janosch Frank <frankja@linux.ibm.com>
*/
+#include "facility.h"
#include "kselftest.h"
#include "kvm_util.h"
#include "test_util.h"
@@ -109,6 +110,111 @@ static void test_user_operexec_combined(void)
kvm_vm_free(vm);
}
+static struct kvm_vm *create_vm_without_sthyi(void)
+{
+ struct kvm_s390_vm_cpu_processor info;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+
+ clear_bit_inv(74, (unsigned long *)&info.fac_list);
+ kvm_device_attr_set(vm->fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+
+ return vm;
+}
+
+static void test_user_instr0_no_stfle_74(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = create_vm_without_sthyi();
+
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code_instr0);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0000);
+
+ kvm_vm_free(vm);
+}
+
+static void test_user_operexec_no_stfle_74(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = create_vm_without_sthyi();
+
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code_user_operexec);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+static void test_instr0_combined_no_stfle_74(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = create_vm_without_sthyi();
+
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code_instr0);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0000);
+
+ kvm_vm_free(vm);
+}
+
+static void test_operexec_combined_no_stfle_74(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = create_vm_without_sthyi();
+
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code_user_operexec);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
/*
* Run all tests above.
*
@@ -122,6 +228,10 @@ static struct testdef {
{ "instr0", test_user_instr0 },
{ "operexec", test_user_operexec },
{ "operexec_combined", test_user_operexec_combined},
+ { "instr0_no_stfle_74", test_user_instr0_no_stfle_74 },
+ { "instr0_combined_no_stfle_74", test_instr0_combined_no_stfle_74 },
+ { "operexec_combined_no_stfle_74", test_operexec_combined_no_stfle_74 },
+ { "operexec_no_stfle_74", test_user_operexec_no_stfle_74 },
};
int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index e639a9db51ee..a152ab65c657 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -510,7 +510,7 @@ static void test_add_overlapping_private_memory_regions(void)
vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
- memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
+ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 5, 0);
vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0);
@@ -526,12 +526,35 @@ static void test_add_overlapping_private_memory_regions(void)
vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
MEM_REGION_GPA, 0, NULL, -1, 0);
- /* Overlap the front half of the other slot. */
+ /*
+ * Verify that overlap in the guest_memfd bindings (i.e. in guest_memfd
+ * file offsets), but _not_ in the GPA space, fails with -EEXIST.
+ */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, MEM_REGION_SIZE);
+ TEST_ASSERT(r == -1 && errno == EEXIST,
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ /* And now the back half of the other slot's guest_memfd binding. */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, MEM_REGION_SIZE * 3);
+ TEST_ASSERT(r == -1 && errno == EEXIST,
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ /*
+ * Repeat the overlap tests, but this time with overlap in the memslots
+ * GPA space. Regardless of where there is overlap, KVM should return
+ * -EEXIST.
+ */
r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
MEM_REGION_GPA * 2 - MEM_REGION_SIZE,
MEM_REGION_SIZE * 2,
0, memfd, 0);
- TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ TEST_ASSERT(r == -1 && errno == EEXIST,
"Overlapping guest_memfd() bindings should fail with EEXIST");
/* And now the back half of the other slot. */
@@ -539,7 +562,7 @@ static void test_add_overlapping_private_memory_regions(void)
MEM_REGION_GPA * 2 + MEM_REGION_SIZE,
MEM_REGION_SIZE * 2,
0, memfd, 0);
- TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ TEST_ASSERT(r == -1 && errno == EEXIST,
"Overlapping guest_memfd() bindings should fail with EEXIST");
close(memfd);
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 0dfaf03cd0a0..2a2ef3e179ff 100644
--- a/tools/testing/selftests/kvm/x86/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -15,10 +15,51 @@
#define IRQ_VECTOR 0xAA
+#define CAST_TO_RIP(v) ((unsigned long long)&(v))
+
/* For testing data access debug BP */
u32 guest_value;
extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+extern unsigned char fep_bd_start, fep_sti_start, fep_sti_end;
+
+static int irqs_received;
+
+static void guest_db_handler(struct ex_regs *regs)
+{
+ static int count;
+ unsigned long target_rips[2] = {
+ CAST_TO_RIP(fep_sti_start),
+ CAST_TO_RIP(fep_sti_end),
+ };
+
+ __GUEST_ASSERT(regs->rip == target_rips[count],
+ "STI[%u]: unexpected rip 0x%lx (should be 0x%lx)",
+ count, regs->rip, target_rips[count]);
+ regs->rflags &= ~X86_EFLAGS_TF;
+ count++;
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ /*
+ * The pending IRQ should finally be take when KVM_GUESTDBG_BLOCKIRQ is
+ * cleared and IRQs are enabled. Note, the IRQ is expected to arrive
+ * on the instruction immediately after STI, even though its in an STI
+ * shadow. Because the next instruction has a coincident #DB, and #DBs
+ * are not subject to STI-blocking, the #DB will push RFLAGS.IF=1 on
+ * the stack, and the eventual IRET will unmask IRQs and obliterate the
+ * STI shadow in the process.
+ */
+ unsigned long target_rip = CAST_TO_RIP(fep_sti_start);
+
+ __GUEST_ASSERT(regs->rip == target_rip,
+ "IRQ: unexpected rip 0x%lx (should be 0x%lx)",
+ regs->rip, target_rip);
+
+ irqs_received++;
+ x2apic_write_reg(APIC_EOI, 0);
+}
static void guest_code(void)
{
@@ -64,11 +105,33 @@ static void guest_code(void)
/* DR6.BD test */
asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+
+ /*
+ * Note, the IRET from the #DB that occurs in the below STI-shadow will
+ * unmask IRQs, i.e. the pending interrupt will be delivered after #DB
+ * handling, on the CLI!
+ */
+ if (is_forced_emulation_enabled) {
+ asm volatile(KVM_FEP "fep_bd_start: mov %%dr0, %%rax" : : : "rax");
+
+ /* pending debug exceptions for emulation */
+ asm volatile("pushf\n\t"
+ "orq $" __stringify(X86_EFLAGS_TF) ", (%rsp)\n\t"
+ "popf\n\t"
+ "sti\n\t"
+ "fep_sti_start:"
+ "cli\n\t"
+ "pushf\n\t"
+ "orq $" __stringify(X86_EFLAGS_TF) ", (%rsp)\n\t"
+ "popf\n\t"
+ KVM_FEP "sti\n\t"
+ "fep_sti_end:"
+ "cli\n\t");
+ GUEST_ASSERT(irqs_received == 1);
+ }
GUEST_DONE();
}
-#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-
static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
{
struct kvm_regs regs;
@@ -185,7 +248,7 @@ int main(void)
target_dr6);
}
- /* Finally test global disable */
+ /* test global disable */
memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[7] = 0x400 | DR7_GD;
@@ -202,10 +265,29 @@ int main(void)
run->debug.arch.pc, target_rip, run->debug.arch.dr6,
target_dr6);
+ /* test global disable in emulation */
+ if (is_forced_emulation_enabled) {
+ /* Skip the 3-bytes "mov dr0" */
+ vcpu_skip_insn(vcpu, 3);
+ vcpu_run(vcpu);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(fep_bd_start) &&
+ run->debug.arch.dr6 == target_dr6,
+ "DR7.GD: exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(fep_bd_start),
+ run->debug.arch.dr6, target_dr6);
+ }
+
/* Disable all debug controls, run to the end */
memset(&debug, 0, sizeof(debug));
vcpu_guest_debug_set(vcpu, &debug);
+ vm_install_exception_handler(vm, DB_VECTOR, guest_db_handler);
+ vm_install_exception_handler(vm, IRQ_VECTOR, guest_irq_handler);
+
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
cmd = get_ucall(vcpu, &uc);
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
index 8e20a03b3329..53b7971aa072 100644
--- a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
@@ -11,12 +11,17 @@
void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
{
const u64 ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
- const u64 valid = BIT_ULL(18) | BIT_ULL(24);
- const u64 legal = ignored | valid;
+ u64 valid = BIT_ULL(18) | BIT_ULL(24);
u64 val = BIT_ULL(bit);
u64 actual;
+ u64 legal;
int r;
+ if (kvm_cpu_has(X86_FEATURE_GP_ON_USER_CPUID))
+ valid |= BIT_ULL(35);
+
+ legal = ignored | valid;
+
r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
TEST_ASSERT(val & ~legal ? !r : r == 1,
"Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 7347f1fe5157..2effde85c4c8 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -26,6 +26,7 @@ struct msr_data {
bool fault_expected;
bool write;
u64 write_val;
+ bool reset_expected;
};
struct hcall_data {
@@ -267,14 +268,9 @@ static void guest_test_msrs_access(void)
case 16:
msr->idx = HV_X64_MSR_RESET;
msr->write = true;
- /*
- * TODO: the test only writes '0' to HV_X64_MSR_RESET
- * at the moment, writing some other value there will
- * trigger real vCPU reset and the code is not prepared
- * to handle it yet.
- */
- msr->write_val = 0;
+ msr->write_val = 1;
msr->fault_expected = false;
+ msr->reset_expected = true;
break;
case 17:
@@ -457,7 +453,7 @@ static void guest_test_msrs_access(void)
msr->fault_expected = true;
break;
case 45:
- /* MSR is vailable when CPUID feature bit is set */
+ /* MSR is available when CPUID feature bit is set */
if (!has_invtsc)
goto next_stage;
vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
@@ -497,6 +493,15 @@ static void guest_test_msrs_access(void)
msr->idx, msr->write ? "write" : "read");
vcpu_run(vcpu);
+
+ if (msr->reset_expected) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SYSTEM_EVENT);
+ TEST_ASSERT(vcpu->run->system_event.type == KVM_SYSTEM_EVENT_RESET,
+ "Expected reset system event, got type %u",
+ vcpu->run->system_event.type);
+ goto next_stage;
+ }
+
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 15ee8b7bfc11..b4be9a175379 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -142,17 +142,6 @@ static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2)
}
/*
- * TODO: replace the silly NOP loop with a proper udelay() implementation.
- */
-static inline void do_delay(void)
-{
- int i;
-
- for (i = 0; i < 1000000; i++)
- asm volatile("nop");
-}
-
-/*
* Prepare to test: 'disable' workers by setting the expectation to '0',
* clear hypercall input page and then swap two test pages.
*/
@@ -169,7 +158,7 @@ static inline void prepare_to_test(struct test_data *data)
wmb();
/* Make sure workers have enough time to notice */
- do_delay();
+ udelay(100);
/* Swap test page mappings */
swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
@@ -189,7 +178,7 @@ static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
/* Make sure workers have enough time to test */
- do_delay();
+ udelay(100);
}
#define TESTVAL1 0x0101010101010101
diff --git a/tools/testing/selftests/kvm/x86/nested_tdp_fault_test.c b/tools/testing/selftests/kvm/x86/nested_tdp_fault_test.c
new file mode 100644
index 000000000000..fa95568f55ff
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_tdp_fault_test.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum test_type {
+ TEST_FINAL_PAGE_UNMAPPED, /* Final data page not present */
+ TEST_PT_PAGE_UNMAPPED, /* Page table page not present */
+ TEST_FINAL_PAGE_WRITE_PROTECTED, /* Final data page read-only */
+ TEST_PT_PAGE_WRITE_PROTECTED, /* Page table page read-only */
+};
+
+static gva_t l2_test_page;
+static void (*l2_entry)(void);
+
+#define TEST_IO_PORT 0x80
+#define TEST1_VADDR 0x8000000ULL
+#define TEST2_VADDR 0x10000000ULL
+#define TEST3_VADDR 0x18000000ULL
+#define TEST4_VADDR 0x20000000ULL
+
+/*
+ * L2 executes OUTS reading from l2_test_page, triggering a nested page
+ * fault on the read access.
+ */
+static void l2_guest_code_outs(void)
+{
+ asm volatile("outsb" ::"S"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
+ GUEST_FAIL("L2 should not reach here");
+}
+
+/*
+ * L2 executes INS writing to l2_test_page, triggering a nested page
+ * fault on the write access.
+ */
+static void l2_guest_code_ins(void)
+{
+ asm volatile("insb" ::"D"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
+ GUEST_FAIL("L2 should not reach here");
+}
+
+#define GUEST_ASSERT_EXIT_QUAL(ac_eq, ex_eq) \
+ __GUEST_ASSERT((ac_eq) == (ex_eq), \
+ "Wanted EXIT_QUAL '0x%lx', got '0x%lx'", ex_eq, ac_eq)
+
+static void l1_vmx_code(struct vmx_pages *vmx, u64 expected_fault_gpa,
+ u64 test_type)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 exit_qual;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_entry, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* Verify we got an EPT violation exit */
+ __GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION,
+ "Expected EPT violation (0x%x), got 0x%lx",
+ EXIT_REASON_EPT_VIOLATION,
+ vmreadz(VM_EXIT_REASON));
+
+ __GUEST_ASSERT(vmreadz(GUEST_PHYSICAL_ADDRESS) == expected_fault_gpa,
+ "Expected guest_physical_address = 0x%lx, got 0x%lx",
+ expected_fault_gpa,
+ vmreadz(GUEST_PHYSICAL_ADDRESS));
+
+ exit_qual = vmreadz(EXIT_QUALIFICATION);
+
+ /*
+ * Note, EPT page table accesses are always read+write, e.g. so that
+ * the CPU can do A/D updates at-will.
+ */
+ switch (test_type) {
+ case TEST_FINAL_PAGE_UNMAPPED:
+ GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
+ EPT_VIOLATION_GVA_IS_VALID |
+ EPT_VIOLATION_GVA_TRANSLATED);
+ break;
+ case TEST_PT_PAGE_UNMAPPED:
+ GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
+ EPT_VIOLATION_ACC_WRITE |
+ EPT_VIOLATION_GVA_IS_VALID);
+ break;
+ case TEST_FINAL_PAGE_WRITE_PROTECTED:
+ GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_WRITE |
+ EPT_VIOLATION_PROT_READ |
+ EPT_VIOLATION_PROT_EXEC |
+ EPT_VIOLATION_GVA_IS_VALID |
+ EPT_VIOLATION_GVA_TRANSLATED);
+ break;
+ case TEST_PT_PAGE_WRITE_PROTECTED:
+ GUEST_ASSERT_EXIT_QUAL(exit_qual, EPT_VIOLATION_ACC_READ |
+ EPT_VIOLATION_ACC_WRITE |
+ EPT_VIOLATION_PROT_READ |
+ EPT_VIOLATION_PROT_EXEC |
+ EPT_VIOLATION_GVA_IS_VALID);
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+#define GUEST_ASSERT_NPF_EC(ac_ec, ex_ec) \
+ __GUEST_ASSERT((ac_ec) == (ex_ec), \
+ "Wanted NPF error code '0x%lx', got '0x%lx'", (u64)(ex_ec), ac_ec)
+
+
+static void l1_svm_code(struct svm_test_data *svm, u64 expected_fault_gpa,
+ u64 test_type)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ u64 exit_info_1;
+
+ generic_svm_setup(svm, l2_entry,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* Verify we got an NPF exit */
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_NPF,
+ "Expected NPF exit (0x%x), got 0x%lx", SVM_EXIT_NPF,
+ vmcb->control.exit_code);
+
+ __GUEST_ASSERT(vmcb->control.exit_info_2 == expected_fault_gpa,
+ "Expected exit_info_2 = 0x%lx, got 0x%lx",
+ expected_fault_gpa,
+ vmcb->control.exit_info_2);
+
+ exit_info_1 = vmcb->control.exit_info_1;
+
+ /*
+ * Note, without GMET enabled, NPT walks are always user accesses. And
+ * like EPT, page table accesses are always read+write.
+ */
+ switch (test_type) {
+ case TEST_FINAL_PAGE_UNMAPPED:
+ GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_USER_MASK |
+ PFERR_GUEST_FINAL_MASK);
+ break;
+ case TEST_PT_PAGE_UNMAPPED:
+ GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_WRITE_MASK |
+ PFERR_USER_MASK |
+ PFERR_GUEST_PAGE_MASK);
+ break;
+ case TEST_FINAL_PAGE_WRITE_PROTECTED:
+ GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_PRESENT_MASK |
+ PFERR_WRITE_MASK |
+ PFERR_USER_MASK |
+ PFERR_GUEST_FINAL_MASK);
+ break;
+ case TEST_PT_PAGE_WRITE_PROTECTED:
+ GUEST_ASSERT_NPF_EC(exit_info_1, PFERR_PRESENT_MASK |
+ PFERR_WRITE_MASK |
+ PFERR_USER_MASK |
+ PFERR_GUEST_PAGE_MASK);
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data, u64 expected_fault_gpa,
+ u64 test_type)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data, expected_fault_gpa, test_type);
+ else
+ l1_svm_code(data, expected_fault_gpa, test_type);
+}
+
+/* Returns the GPA of the PT page that maps @vaddr. */
+static u64 get_pt_gpa_for_vaddr(struct kvm_vm *vm, u64 vaddr)
+{
+ u64 *pte;
+
+ pte = vm_get_pte(vm, vaddr);
+ TEST_ASSERT(pte && (*pte & 0x1), "PTE not present for vaddr 0x%lx",
+ (unsigned long)vaddr);
+
+ return addr_hva2gpa(vm, (void *)((u64)pte & ~0xFFFULL));
+}
+
+static void run_test(enum test_type type)
+{
+ gpa_t expected_fault_gpa;
+ gva_t nested_gva;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_tdp(vm);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
+
+ switch (type) {
+ case TEST_FINAL_PAGE_UNMAPPED:
+ /*
+ * Unmap the final data page from NPT/EPT. The guest page
+ * table walk succeeds, but the final GPA->HPA translation
+ * fails. L2 reads from the page via OUTS.
+ */
+ l2_entry = l2_guest_code_outs;
+ l2_test_page = vm_alloc(vm, vm->page_size, TEST1_VADDR);
+ expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
+ break;
+ case TEST_PT_PAGE_UNMAPPED:
+ /*
+ * Unmap a page table page from NPT/EPT. The hardware page
+ * table walk fails when translating the PT page's GPA
+ * through NPT/EPT. L2 reads from the page via OUTS.
+ */
+ l2_entry = l2_guest_code_outs;
+ l2_test_page = vm_alloc(vm, vm->page_size, TEST2_VADDR);
+ expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
+ break;
+ case TEST_FINAL_PAGE_WRITE_PROTECTED:
+ /*
+ * Write-protect the final data page in NPT/EPT. The page
+ * is present and readable, but not writable. L2 writes to
+ * the page via INS, triggering a protection violation.
+ */
+ l2_entry = l2_guest_code_ins;
+ l2_test_page = vm_alloc(vm, vm->page_size, TEST3_VADDR);
+ expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
+ break;
+ case TEST_PT_PAGE_WRITE_PROTECTED:
+ /*
+ * Write-protect a page table page in NPT/EPT. The page is
+ * present and readable, but not writable. The guest page
+ * table walk needs write access to set A/D bits, so it
+ * triggers a protection violation on the PT page.
+ * L2 reads from the page via OUTS.
+ */
+ l2_entry = l2_guest_code_outs;
+ l2_test_page = vm_alloc(vm, vm->page_size, TEST4_VADDR);
+ expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
+ break;
+ }
+
+ tdp_identity_map_default_memslots(vm);
+
+ if (type == TEST_FINAL_PAGE_WRITE_PROTECTED ||
+ type == TEST_PT_PAGE_WRITE_PROTECTED)
+ *tdp_get_pte(vm, expected_fault_gpa) &= ~PTE_WRITABLE_MASK(&vm->stage2_mmu);
+ else
+ *tdp_get_pte(vm, expected_fault_gpa) &= ~(PTE_PRESENT_MASK(&vm->stage2_mmu) |
+ PTE_READABLE_MASK(&vm->stage2_mmu) |
+ PTE_WRITABLE_MASK(&vm->stage2_mmu) |
+ PTE_EXECUTABLE_MASK(&vm->stage2_mmu));
+
+ sync_global_to_guest(vm, l2_entry);
+ sync_global_to_guest(vm, l2_test_page);
+ vcpu_args_set(vcpu, 3, nested_gva, expected_fault_gpa, (u64)type);
+
+ /*
+ * For the INS-based write test, KVM emulates the instruction and
+ * first reads from the I/O port, which exits to userspace.
+ * Re-enter the guest so emulation can proceed to the memory
+ * write, where the nested page fault is triggered.
+ */
+ for (;;) {
+ vcpu_run(vcpu);
+
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ vcpu->run->io.port == TEST_IO_PORT &&
+ vcpu->run->io.direction == KVM_EXIT_IO_IN) {
+ continue;
+ }
+ break;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected exit reason: %d", vcpu->run->exit_reason);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_cpu_has_tdp());
+
+ run_test(TEST_FINAL_PAGE_UNMAPPED);
+ run_test(TEST_PT_PAGE_UNMAPPED);
+ run_test(TEST_FINAL_PAGE_WRITE_PROTECTED);
+ run_test(TEST_PT_PAGE_WRITE_PROTECTED);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c1232344fda8..84e4c6ca67a3 100644
--- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -731,6 +731,8 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
static void intel_run_fixed_counter_guest_code(u8 idx)
{
+ u8 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
for (;;) {
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
@@ -738,6 +740,10 @@ static void intel_run_fixed_counter_guest_code(u8 idx)
/* Only OS_EN bit is enabled for fixed counter[idx]. */
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+ if (nr_fixed_counters > 1)
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL,
+ FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL) |
+ FIXED_PMC_CTRL((idx + 1) % nr_fixed_counters, FIXED_PMC_KERNEL));
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
diff --git a/tools/testing/selftests/kvm/x86/sev_dbg_test.c b/tools/testing/selftests/kvm/x86/sev_dbg_test.c
new file mode 100644
index 000000000000..a9d8e4c059f9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_dbg_test.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+
+#define BUFFER_SIZE (PAGE_SIZE * 2)
+
+static u8 *data;
+static u8 src[BUFFER_SIZE] __aligned(PAGE_SIZE);
+static u8 dst[BUFFER_SIZE] __aligned(PAGE_SIZE);
+
+static void validate_dst(int i, int nr_bytes, u8 pattern)
+{
+ for ( ; i < nr_bytes; i++)
+ TEST_ASSERT(dst[i] == pattern,
+ "Expected 0x%x at byte %u, got 0x%x",
+ pattern, i, dst[i]);
+}
+
+static void validate_buffers(void)
+{
+ int i;
+
+ for (i = 0; i < BUFFER_SIZE; i++)
+ TEST_ASSERT(src[i] == dst[i],
+ "Expected src[%u] (0x%x) == dst[%u] (0x%x)",
+ i, src[i], i, dst[i]);
+}
+
+static void ____test_sev_dbg(struct kvm_vm *vm, int i, int j, int nr_bytes)
+{
+ u8 pattern = guest_random_u32(&guest_rng);
+
+ if (i + nr_bytes > BUFFER_SIZE || j + nr_bytes > BUFFER_SIZE)
+ return;
+
+ memset(&src[i], pattern, nr_bytes);
+ sev_encrypt_memory(vm, &data[j], &src[i], nr_bytes);
+ sev_decrypt_memory(vm, &dst[i], &data[j], nr_bytes);
+ validate_buffers();
+ validate_dst(i, nr_bytes, pattern);
+}
+
+static void __test_sev_dbg(struct kvm_vm *vm, int nr_bytes)
+{
+ /*
+ * In a perfect world, all sizes at all combinations within the buffers
+ * would be tested. In reality, even this much testing is quite slow.
+ * Target sizes and offsets around the chunk (16 bytes) and page (4096
+ * bytes) sizes.
+ */
+ int x[] = { 1, 8, 15, 16, 23 };
+ int p = PAGE_SIZE - 24;
+ int i, j;
+
+ ____test_sev_dbg(vm, 0, 0, nr_bytes);
+
+ for (i = 0; i < ARRAY_SIZE(x); i++) {
+ for (j = 0; j < ARRAY_SIZE(x); j++) {
+ ____test_sev_dbg(vm, x[i], x[j], nr_bytes);
+ ____test_sev_dbg(vm, x[i], p + x[j], nr_bytes);
+ ____test_sev_dbg(vm, p + x[i], x[j], nr_bytes);
+ ____test_sev_dbg(vm, p + x[i], p + x[j], nr_bytes);
+ }
+ }
+}
+
+static void test_sev_dbg(u32 type, u64 policy)
+{
+ int sizes[] = { 1, 8, 15, 16, 17, 32, 33 };
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ if (!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(type)))
+ return;
+
+ vm = vm_sev_create_with_one_vcpu(type, NULL, &vcpu);
+
+ data = addr_gva2hva(vm, vm_alloc(vm, BUFFER_SIZE, KVM_UTIL_MIN_VADDR));
+ memset(data, 0xaa, BUFFER_SIZE);
+
+ vm_sev_launch(vm, policy, NULL);
+
+ sev_decrypt_memory(vm, dst, data, BUFFER_SIZE);
+ validate_dst(0, BUFFER_SIZE, 0xaa);
+
+ memset(src, 0x55, BUFFER_SIZE);
+ sev_encrypt_memory(vm, data, src, BUFFER_SIZE);
+ sev_decrypt_memory(vm, dst, data, BUFFER_SIZE);
+ validate_dst(0, BUFFER_SIZE, 0x55);
+
+ __test_sev_dbg(vm, PAGE_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ __test_sev_dbg(vm, sizes[i]);
+ __test_sev_dbg(vm, PAGE_SIZE - sizes[i]);
+ __test_sev_dbg(vm, PAGE_SIZE + sizes[i]);
+ __test_sev_dbg(vm, BUFFER_SIZE - sizes[i]);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ /* Note, KVM doesn't support {de,en}crypt commands for SNP. */
+ test_sev_dbg(KVM_X86_SEV_VM, 0);
+ test_sev_dbg(KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 8eeba2327c7c..8db88c355f16 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -136,16 +136,14 @@ int main(int argc, char *argv[])
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
- have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+ have_sev_es = kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM);
- TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
- "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
- kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ TEST_ASSERT(!have_sev_es || kvm_cpu_has(X86_FEATURE_SEV_ES),
+ "sev-es: SEV_ES_VM supported without SEV_ES in CPUID");
- have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
- TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
- "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
- kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+ have_snp = kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM);
+ TEST_ASSERT(!have_snp || kvm_cpu_has(X86_FEATURE_SEV_SNP),
+ "sev-snp: SNP_VM supported without SEV_SNP in CPUID");
test_vm_types();
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 6b0928e69051..42bc023d5193 100644
--- a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -374,7 +374,7 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
- have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+ have_sev_es = kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM);
if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
test_sev_migrate_from(/* es= */ false);
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index 1a49ee391586..6b2cbe2a90b7 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -249,10 +249,10 @@ int main(int argc, char *argv[])
test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM))
test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
- if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM))
test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_pat_test.c b/tools/testing/selftests/kvm/x86/svm_nested_pat_test.c
new file mode 100644
index 000000000000..92da8ff34da1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_pat_test.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ *
+ * Test that KVM correctly virtualizes the PAT MSR and VMCB g_pat field
+ * for nested SVM guests:
+ *
+ * o With nested NPT disabled:
+ * - L1 and L2 share the same PAT
+ * - The vmcb12.g_pat is ignored
+ * o With nested NPT enabled:
+ * - Invalid g_pat in vmcb12 should cause VMEXIT_INVALID
+ * - L2 should see vmcb12.g_pat via RDMSR, not L1's PAT
+ * - L2's writes to PAT should be saved to vmcb12 on exit
+ * - L1's PAT should be restored after #VMEXIT from L2
+ * - State save/restore should preserve both L1's and L2's PAT values
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+#define PAT_DEFAULT 0x0007040600070406ULL
+#define L1_PAT_VALUE 0x0007040600070404ULL /* Change PA0 to WT */
+#define L2_VMCB12_PAT 0x0606060606060606ULL /* All WB */
+#define L2_PAT_MODIFIED 0x0606060606060604ULL /* Change PA0 to WT */
+#define INVALID_PAT_VALUE 0x0808080808080808ULL /* 8 is reserved */
+
+bool npt_enabled;
+int nr_iterations;
+
+static void l2_guest_code(void)
+{
+ u64 expected_pat = npt_enabled ? L2_VMCB12_PAT : L1_PAT_VALUE;
+ int i;
+
+ for (i = 0; i < nr_iterations; i++) {
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), expected_pat);
+ GUEST_SYNC(1);
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), expected_pat);
+
+ wrmsr(MSR_IA32_CR_PAT, L2_PAT_MODIFIED);
+ expected_pat = L2_PAT_MODIFIED;
+
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), L2_PAT_MODIFIED);
+ GUEST_SYNC(2);
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), L2_PAT_MODIFIED);
+
+ vmmcall();
+ }
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ int i;
+
+ wrmsr(MSR_IA32_CR_PAT, L1_PAT_VALUE);
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), L1_PAT_VALUE);
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->save.g_pat = L2_VMCB12_PAT;
+ vmcb->control.intercept &= ~(1ULL << INTERCEPT_MSR_PROT);
+
+ for (i = 0; i < nr_iterations; i++) {
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ GUEST_ASSERT_EQ(vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ /*
+ * If NPT is enabled by L1, L2 has a unique PAT and L1's PAT is
+ * unchanged. Otherwise, PAT is shared between L1 and L2.
+ */
+ if (npt_enabled) {
+ GUEST_ASSERT_EQ(vmcb->save.g_pat, L2_PAT_MODIFIED);
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), L1_PAT_VALUE);
+ } else {
+ GUEST_ASSERT_EQ(rdmsr(MSR_IA32_CR_PAT), L2_PAT_MODIFIED);
+ }
+ vmcb->save.rip += 3; /* skip over VMMCALL */
+ }
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code_invalid_gpat(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ /* VMRUN should fail without running L2 */
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->save.g_pat = INVALID_PAT_VALUE;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ GUEST_ASSERT_EQ(vmcb->control.exit_code, SVM_EXIT_ERR);
+ GUEST_DONE();
+}
+
+static void run_test(void *guest_code, bool do_save_restore, int nr_iters)
+{
+ struct kvm_x86_state *state;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ gva_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
+
+ if (npt_enabled)
+ vm_enable_npt(vm);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (npt_enabled)
+ tdp_identity_map_default_memslots(vm);
+
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ nr_iterations = nr_iters;
+ sync_global_to_guest(vm, npt_enabled);
+ sync_global_to_guest(vm, nr_iterations);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ if (do_save_restore) {
+ state = vcpu_save_state(vcpu);
+ kvm_vm_release(vm);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ break;
+ case UCALL_DONE:
+ kvm_vm_free(vm);
+ return;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+#define gpat_test(test_name, guest_code, npt_setting) \
+do { \
+ npt_setting; \
+ \
+ if (npt_enabled && !kvm_cpu_has(X86_FEATURE_NPT)) { \
+ pr_info("Skipping: " test_name " (no NPT support)\n"); \
+ break; \
+ } \
+ \
+ pr_info("Testing: " test_name "\n"); \
+ run_test(guest_code, false, 1); \
+ \
+ if (guest_code == l1_guest_code) { \
+ pr_info("Testing: " test_name " Save/Restore\n"); \
+ run_test(guest_code, true, 1); \
+ \
+ pr_info("Testing: " test_name " Multiple VMRUNs\n"); \
+ run_test(guest_code, false, 10); \
+ } \
+} while (0)
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
+ KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
+
+ gpat_test("Invalid gPAT", l1_guest_code_invalid_gpat, npt_enabled = true);
+ gpat_test("Nested NPT enabled", l1_guest_code, npt_enabled = true);
+ gpat_test("Nested NPT disabled", l1_guest_code, npt_enabled = false);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
index e0c52321f87c..5b0c2359bbb4 100644
--- a/tools/testing/selftests/kvm/x86/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
@@ -255,7 +255,6 @@ KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
struct kvm_regs regs;
/* Request and verify all valid register sets. */
- /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index 834005b2b0f0..f45fdef35681 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -45,17 +45,25 @@ struct audit_message {
};
};
-static const struct timeval audit_tv_dom_drop = {
+static const struct timeval audit_tv_default = {
/*
- * Because domain deallocation is tied to asynchronous credential
- * freeing, receiving such event may take some time. In practice,
- * on a small VM, it should not exceed 100k usec, but let's wait up
- * to 1 second to be safe.
+ * Default socket timeout for audit_match_record() callers that expect a
+ * record to arrive. Asynchronous kauditd delivery can exceed 1 usec
+ * under heavy debug configs (KASAN, lockdep), where kauditd_thread
+ * scheduling between audit_log_end() and netlink_unicast() takes longer
+ * than the previous 1 usec timeout. 1 second is a generous ceiling: on
+ * the happy path, kauditd delivers within dozens of usec.
*/
.tv_sec = 1,
};
-static const struct timeval audit_tv_default = {
+static const struct timeval audit_tv_fast = {
+ /*
+ * Fast timeout for paths that expect no record (audit_init() drain,
+ * audit_count_records(), probes). Causes audit_recv() to return
+ * -EAGAIN once the socket buffer is empty, naturally terminating the
+ * read loop.
+ */
.tv_usec = 1,
};
@@ -334,8 +342,13 @@ static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
* Matches a domain deallocation record. When expected_domain_id is non-zero,
* the pattern includes the specific domain ID so that stale deallocation
* records from a previous test (with a different domain ID) are skipped by
- * audit_match_record(), and the socket timeout is temporarily increased to
- * audit_tv_dom_drop to wait for the asynchronous kworker deallocation.
+ * audit_match_record(), waiting for the asynchronous kworker deallocation with
+ * the default patient timeout.
+ *
+ * When expected_domain_id is zero, the caller is probing for any dealloc record
+ * that may or may not arrive. Temporarily lowers the socket timeout to
+ * audit_tv_fast for this probe so it returns promptly when no record is
+ * pending; restores audit_tv_default after.
*/
static int __maybe_unused
matches_log_domain_deallocated(int audit_fd, unsigned int num_denials,
@@ -361,16 +374,21 @@ matches_log_domain_deallocated(int audit_fd, unsigned int num_denials,
if (log_match_len >= sizeof(log_match))
return -E2BIG;
- if (expected_domain_id)
- setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO,
- &audit_tv_dom_drop, sizeof(audit_tv_dom_drop));
+ if (!expected_domain_id) {
+ if (setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_fast, sizeof(audit_tv_fast)))
+ return -errno;
+ }
err = audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
domain_id);
- if (expected_domain_id)
- setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
- sizeof(audit_tv_default));
+ if (!expected_domain_id) {
+ if (setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)) &&
+ !err)
+ err = -errno;
+ }
return err;
}
@@ -381,30 +399,46 @@ struct audit_records {
};
/*
- * WARNING: Do not assert records.domain == 0 without a preceding
- * audit_match_record() call. Domain deallocation records are emitted
- * asynchronously from kworker threads and can arrive after the drain in
- * audit_init(), corrupting the domain count. A preceding audit_match_record()
- * call consumes stale records while scanning, making the assertion safe in
- * practice because stale deallocation records arrive before the expected access
- * records.
+ * Counts remaining audit records by type, skipping domain deallocation records.
+ * Deallocation records are emitted asynchronously from kworker threads after a
+ * previous test's child has exited, so they can arrive after the drain in
+ * audit_init() and after the preceding audit_match_record() call. Allocation
+ * records are emitted synchronously during landlock_log_denial() in the current
+ * test's syscall context, so only those are counted in records->domain.
+ *
+ * Temporarily lowers SO_RCVTIMEO to audit_tv_fast for the read loop: this is a
+ * "no record expected" path that should terminate on the first -EAGAIN. The
+ * default patient timeout is restored on exit for subsequent
+ * audit_match_record() callers.
*/
static int audit_count_records(int audit_fd, struct audit_records *records)
{
+ static const char dealloc_pattern[] = REGEX_LANDLOCK_PREFIX
+ " status=deallocated ";
struct audit_message msg;
- int err;
+ regex_t dealloc_re;
+ int ret, err = 0;
+
+ ret = regcomp(&dealloc_re, dealloc_pattern, 0);
+ if (ret)
+ return -ENOMEM;
records->access = 0;
records->domain = 0;
+ if (setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_fast,
+ sizeof(audit_tv_fast))) {
+ err = -errno;
+ goto out;
+ }
+
do {
memset(&msg, 0, sizeof(msg));
err = audit_recv(audit_fd, &msg);
if (err) {
if (err == -EAGAIN)
- return 0;
- else
- return err;
+ err = 0;
+ break;
}
switch (msg.header.nlmsg_type) {
@@ -412,12 +446,24 @@ static int audit_count_records(int audit_fd, struct audit_records *records)
records->access++;
break;
case AUDIT_LANDLOCK_DOMAIN:
- records->domain++;
+ ret = regexec(&dealloc_re, msg.data, 0, NULL, 0);
+ if (ret == REG_NOMATCH) {
+ records->domain++;
+ } else if (ret != 0) {
+ err = -EIO;
+ goto out;
+ }
break;
}
} while (true);
- return 0;
+out:
+ if (setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
+ sizeof(audit_tv_default)) &&
+ !err)
+ err = -errno;
+ regfree(&dealloc_re);
+ return err;
}
static int audit_init(void)
@@ -436,9 +482,9 @@ static int audit_init(void)
if (err)
goto err_close;
- /* Sets a timeout for negative tests. */
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
- sizeof(audit_tv_default));
+ /* Uses the fast timeout to drain stale records below. */
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_fast,
+ sizeof(audit_tv_fast));
if (err) {
err = -errno;
goto err_close;
@@ -454,6 +500,19 @@ static int audit_init(void)
while (audit_recv(fd, NULL) == 0)
;
+ /*
+ * Restores the default timeout for audit_match_record() callers that
+ * expect a record to arrive. Paths that expect no record restore the
+ * fast timeout locally (audit_count_records(), the expected_domain_id
+ * == 0 probe in matches_log_domain_deallocated()).
+ */
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
+ sizeof(audit_tv_default));
+ if (err) {
+ err = -errno;
+ goto err_close;
+ }
+
return fd;
err_close:
@@ -494,10 +553,9 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
static int audit_cleanup(int audit_fd, struct audit_filter *filter)
{
struct audit_filter new_filter;
+ int err = 0;
if (audit_fd < 0 || !filter) {
- int err;
-
/*
* Simulates audit_init_with_exe_filter() when called from
* FIXTURE_TEARDOWN_PARENT().
@@ -508,23 +566,19 @@ static int audit_cleanup(int audit_fd, struct audit_filter *filter)
filter = &new_filter;
err = audit_init_filter_exe(filter, NULL);
- if (err) {
- close(audit_fd);
- return err;
- }
+ if (err)
+ goto err_close;
}
/* Filters might not be in place. */
audit_filter_exe(audit_fd, filter, AUDIT_DEL_RULE);
audit_filter_drop(audit_fd, AUDIT_DEL_RULE);
- /*
- * Because audit_cleanup() might not be called by the test auditd
- * process, it might not be possible to explicitly set it. Anyway,
- * AUDIT_STATUS_ENABLED will implicitly be set to 0 when the auditd
- * process will exit.
- */
- return close(audit_fd);
+ err = audit_set_status(audit_fd, AUDIT_STATUS_ENABLED, 0);
+
+err_close:
+ close(audit_fd);
+ return err;
}
static int audit_init_with_exe_filter(struct audit_filter *filter)
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 93ae5bd0dcce..72b5612375dd 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -76,7 +76,7 @@ TEST_F(audit, layers)
.scoped = LANDLOCK_SCOPE_SIGNAL,
};
int status, ruleset_fd, i;
- __u64(*domain_stack)[16];
+ __u64(*domain_stack)[LANDLOCK_MAX_NUM_LAYERS];
__u64 prev_dom = 3;
pid_t child;
@@ -607,30 +607,42 @@ FIXTURE(audit_flags)
FIXTURE_VARIANT(audit_flags)
{
const int restrict_flags;
+ const __u64 quiet_scoped;
};
/* clang-format off */
FIXTURE_VARIANT_ADD(audit_flags, default) {
/* clang-format on */
.restrict_flags = 0,
+ .quiet_scoped = 0,
};
/* clang-format off */
FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) {
/* clang-format on */
.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+ .quiet_scoped = 0,
};
/* clang-format off */
FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) {
/* clang-format on */
.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+ .quiet_scoped = 0,
};
/* clang-format off */
FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) {
/* clang-format on */
.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+ .quiet_scoped = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, signal_quieted) {
+ /* clang-format on */
+ .restrict_flags = 0,
+ .quiet_scoped = LANDLOCK_SCOPE_SIGNAL,
};
FIXTURE_SETUP(audit_flags)
@@ -674,12 +686,16 @@ TEST_F(audit_flags, signal)
pid_t child;
struct audit_records records;
__u64 deallocated_dom = 2;
+ bool expect_audit = !(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) &&
+ !(variant->quiet_scoped & LANDLOCK_SCOPE_SIGNAL);
child = fork();
ASSERT_LE(0, child);
if (child == 0) {
const struct landlock_ruleset_attr ruleset_attr = {
.scoped = LANDLOCK_SCOPE_SIGNAL,
+ .quiet_scoped = variant->quiet_scoped,
};
int ruleset_fd;
@@ -696,8 +712,7 @@ TEST_F(audit_flags, signal)
EXPECT_EQ(-1, kill(getppid(), 0));
EXPECT_EQ(EPERM, errno);
- if (variant->restrict_flags &
- LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ if (!expect_audit) {
EXPECT_EQ(-EAGAIN, matches_log_signal(
_metadata, self->audit_fd,
getppid(), self->domain_id));
@@ -724,12 +739,12 @@ TEST_F(audit_flags, signal)
/* Makes sure there is no superfluous logged records. */
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
- if (variant->restrict_flags &
- LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ if (!expect_audit) {
EXPECT_EQ(0, records.access);
} else {
EXPECT_EQ(1, records.access);
}
+ EXPECT_EQ(0, records.domain);
/* Updates filter rules to match the drop record. */
set_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -748,8 +763,7 @@ TEST_F(audit_flags, signal)
WEXITSTATUS(status) != EXIT_SUCCESS)
_metadata->exit_code = KSFT_FAIL;
- if (variant->restrict_flags &
- LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ if (!expect_audit) {
/*
* No deallocation record: denials=0 never matches a real
* record.
@@ -849,10 +863,8 @@ FIXTURE_SETUP(audit_exec)
FIXTURE_TEARDOWN(audit_exec)
{
set_cap(_metadata, CAP_AUDIT_CONTROL);
- EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
- AUDIT_DEL_RULE));
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
clear_cap(_metadata, CAP_AUDIT_CONTROL);
- EXPECT_EQ(0, close(self->audit_fd));
}
TEST_F(audit_exec, signal_and_open)
@@ -917,6 +929,7 @@ TEST_F(audit_exec, signal_and_open)
/* Tests that there was no denial until now. */
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
/*
* Wait for the child to do a first denied action by layer1 and
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 30d37234086c..cbd3c1669951 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,8 +76,8 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(9, landlock_create_ruleset(NULL, 0,
- LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(10, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
LANDLOCK_CREATE_RULESET_VERSION));
@@ -201,7 +201,7 @@ TEST(add_rule_checks_ordering)
ASSERT_LE(0, ruleset_fd);
/* Checks invalid flags. */
- ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 100));
ASSERT_EQ(EINVAL, errno);
/* Checks invalid ruleset FD. */
@@ -526,4 +526,120 @@ TEST(cred_transfer)
EXPECT_EQ(EACCES, errno);
}
+TEST(useless_quiet_rule_fs)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .quiet_access_fs = 0,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd, root_fd;
+
+ drop_caps(_metadata);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ root_fd = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, root_fd);
+ path_beneath_attr.parent_fd = root_fd;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr,
+ LANDLOCK_ADD_RULE_QUIET));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Check that the rule had not been added. */
+ ASSERT_EQ(0, close(root_fd));
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST(useless_quiet_rule_net)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .quiet_access_net = 0,
+ };
+ struct landlock_net_port_attr net_port_attr = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = 1024,
+ };
+ int ruleset_fd;
+
+ drop_caps(_metadata);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(-1,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port_attr, LANDLOCK_ADD_RULE_QUIET));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST(invalid_quiet_bits_1)
+{
+ const struct landlock_ruleset_attr ruleset_attr_fs = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ };
+ const struct landlock_ruleset_attr ruleset_attr_net = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .quiet_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_ruleset_attr ruleset_attr_scoped = {
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+ .quiet_scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+
+ /* Quiet bit set but not part of the handled mask. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_fs,
+ sizeof(ruleset_attr_fs), 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_net,
+ sizeof(ruleset_attr_net), 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_scoped,
+ sizeof(ruleset_attr_scoped), 0));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+TEST(invalid_quiet_bits_2)
+{
+ const struct landlock_ruleset_attr ruleset_attr_fs = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .quiet_access_fs = 1ULL << 63,
+ };
+ const struct landlock_ruleset_attr ruleset_attr_net = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .quiet_access_net = 1ULL << 63,
+ };
+ const struct landlock_ruleset_attr ruleset_attr_scoped = {
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+ .quiet_scoped = 1ULL << 63,
+ };
+
+ /* Quiet bit outside of the valid access range. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_fs,
+ sizeof(ruleset_attr_fs), 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_net,
+ sizeof(ruleset_attr_net), 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr_scoped,
+ sizeof(ruleset_attr_scoped), 0));
+ ASSERT_EQ(EINVAL, errno);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 90551650299c..7206d5105d66 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -25,6 +25,8 @@
/* TEST_F_FORK() should not be used for new tests. */
#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
+#define LANDLOCK_MAX_NUM_LAYERS 16
+
static const char bin_sandbox_and_launch[] = "./sandbox-and-launch";
static const char bin_wait_pipe[] = "./wait-pipe";
static const char bin_wait_pipe_sandbox[] = "./wait-pipe-sandbox";
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index cdb47fc1fc0a..86e08aa6e0a7 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -720,7 +720,7 @@ TEST_F_FORK(layout1, rule_with_unhandled_access)
static void add_path_beneath(struct __test_metadata *const _metadata,
const int ruleset_fd, const __u64 allowed_access,
- const char *const path)
+ const char *const path, __u32 flags)
{
struct landlock_path_beneath_attr path_beneath = {
.allowed_access = allowed_access,
@@ -733,7 +733,7 @@ static void add_path_beneath(struct __test_metadata *const _metadata,
strerror(errno));
}
ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
- &path_beneath, 0))
+ &path_beneath, flags))
{
TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
strerror(errno));
@@ -780,7 +780,7 @@ static int create_ruleset(struct __test_metadata *const _metadata,
continue;
add_path_beneath(_metadata, ruleset_fd, rules[i].access,
- rules[i].path);
+ rules[i].path, 0);
}
return ruleset_fd;
}
@@ -1310,7 +1310,7 @@ TEST_F_FORK(layout1, inherit_subset)
* ANDed with the previous ones.
*/
add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
- dir_s1d2);
+ dir_s1d2, 0);
/*
* According to ruleset_fd, dir_s1d2 should now have the
* LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
@@ -1342,7 +1342,7 @@ TEST_F_FORK(layout1, inherit_subset)
* Try to get more privileges by adding new access rights to the parent
* directory: dir_s1d1.
*/
- add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
+ add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1, 0);
enforce_ruleset(_metadata, ruleset_fd);
/* Same tests and results as above. */
@@ -1365,7 +1365,7 @@ TEST_F_FORK(layout1, inherit_subset)
* that there was no rule tied to it before.
*/
add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
- dir_s1d3);
+ dir_s1d3, 0);
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
@@ -1417,7 +1417,7 @@ TEST_F_FORK(layout1, inherit_superset)
add_path_beneath(_metadata, ruleset_fd,
LANDLOCK_ACCESS_FS_READ_FILE |
LANDLOCK_ACCESS_FS_READ_DIR,
- dir_s1d2);
+ dir_s1d2, 0);
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -1441,7 +1441,7 @@ TEST_F_FORK(layout0, max_layers)
};
const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
- for (i = 0; i < 16; i++)
+ for (i = 0; i < LANDLOCK_MAX_NUM_LAYERS; i++)
enforce_ruleset(_metadata, ruleset_fd);
for (i = 0; i < 2; i++) {
@@ -3970,7 +3970,7 @@ static int ioctl_error(struct __test_metadata *const _metadata, int fd,
unsigned int cmd)
{
char buf[128]; /* sufficiently large */
- int res, stdinbak_fd;
+ int res, stdinbak_fd, err;
/*
* Depending on the IOCTL command, parts of the zeroed-out buffer might
@@ -3985,13 +3985,14 @@ static int ioctl_error(struct __test_metadata *const _metadata, int fd,
/* Invokes the IOCTL with a zeroed-out buffer. */
bzero(&buf, sizeof(buf));
res = ioctl(fd, cmd, &buf);
+ err = errno;
/* Restores the old FD 0 and closes the backup FD. */
ASSERT_EQ(0, dup2(stdinbak_fd, 0));
ASSERT_EQ(0, close(stdinbak_fd));
if (res < 0)
- return errno;
+ return err;
return 0;
}
@@ -4789,6 +4790,7 @@ FIXTURE(layout1_bind) {};
static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
+static const char bind_file2_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f2";
/* Move targets for disconnected path tests. */
static const char dir_s4d1[] = TMP_DIR "/s4d1";
@@ -7764,4 +7766,2427 @@ TEST_F(audit_layout1, mount)
EXPECT_EQ(1, records.domain);
}
+static bool debug_quiet_tests;
+
+FIXTURE(audit_quiet_layout1)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit_quiet_layout1)
+{
+ prepare_layout(_metadata);
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ if (getenv("DEBUG_QUIET_TESTS"))
+ debug_quiet_tests = true;
+}
+
+FIXTURE_TEARDOWN_PARENT(audit_quiet_layout1)
+{
+ remove_layout1(_metadata);
+ cleanup_layout(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+struct a_rule {
+ const char *path;
+ __u64 access;
+ bool quiet;
+};
+
+struct a_layer {
+ __u64 handled_access_fs;
+ __u64 quiet_access_fs;
+ struct a_rule rules[6];
+ __u64 restrict_flags;
+};
+
+struct a_target {
+ /* File/dir to try open. */
+ const char *target;
+ /* Open mode (one of O_RDONLY, O_WRONLY, or O_RDWR). */
+ int open_mode;
+ /* Should open succeed? */
+ bool expect_open_success;
+ /* If open fails, whether to expect an audit log for read. */
+ bool audit_read_blocked;
+ /* If open fails, whether to expect an audit log for write. */
+ bool audit_write_blocked;
+ /* If ftruncate() is expected to be allowed. */
+ bool expect_truncate_success;
+ /* If ftruncate fails, whether to expect an audit log. */
+ bool audit_truncate;
+ /*
+ * If ioctl() is expected to be allowed (ioctl not attempted if neither
+ * this nor expect_ioctl_denied is set).
+ */
+ bool expect_ioctl_allowed;
+ /* If ioctl() is expected to be denied. */
+ bool expect_ioctl_denied;
+ /* If ioctl fails, whether to expect an audit log. */
+ bool audit_ioctl;
+};
+
+#define AUDIT_QUIET_MAX_TARGETS 10
+
+FIXTURE_VARIANT(audit_quiet_layout1)
+{
+ struct a_layer layers[3];
+ struct a_target targets[AUDIT_QUIET_MAX_TARGETS];
+};
+
+#define FS_R LANDLOCK_ACCESS_FS_READ_FILE
+#define FS_W LANDLOCK_ACCESS_FS_WRITE_FILE
+#define FS_TRUNC LANDLOCK_ACCESS_FS_TRUNCATE
+#define FS_IOCTL LANDLOCK_ACCESS_FS_IOCTL_DEV
+
+static int sprint_access_bits(char *buf, size_t buflen, __u64 access)
+{
+ size_t offset = 0;
+
+ if (buflen < strlen("rwti make_reg remove_file refer") + 1)
+ abort();
+
+ buf[0] = '\0';
+ if (access & FS_R)
+ offset += snprintf(buf + offset, buflen - offset, "r");
+ if (access & FS_W)
+ offset += snprintf(buf + offset, buflen - offset, "w");
+ if (access & FS_TRUNC)
+ offset += snprintf(buf + offset, buflen - offset, "t");
+ if (access & FS_IOCTL)
+ offset += snprintf(buf + offset, buflen - offset, "i");
+ if (access & LANDLOCK_ACCESS_FS_MAKE_REG)
+ offset += snprintf(buf + offset, buflen - offset, ",make_reg");
+ if (access & LANDLOCK_ACCESS_FS_REMOVE_FILE)
+ offset +=
+ snprintf(buf + offset, buflen - offset, ",remove_file");
+ if (access & LANDLOCK_ACCESS_FS_REFER)
+ offset += snprintf(buf + offset, buflen - offset, ",refer");
+
+ if (buf[0] == ',') {
+ offset--;
+ memmove(buf, buf + 1, offset);
+ buf[offset] = '\0';
+ }
+
+ return offset;
+}
+
+static int apply_a_layer(struct __test_metadata *const _metadata,
+ const struct a_layer *l)
+{
+ struct landlock_ruleset_attr rs_attr = {
+ .handled_access_fs = l->handled_access_fs,
+ .quiet_access_fs = l->quiet_access_fs,
+ };
+ int rs_fd;
+ int i;
+ const struct a_rule *r;
+ char handled_access_s[33], quiet_access_s[33], rule_access_s[33];
+
+ if (!l->handled_access_fs)
+ return 0;
+
+ rs_fd = landlock_create_ruleset(&rs_attr, sizeof(rs_attr), 0);
+ ASSERT_LE(0, rs_fd);
+
+ for (i = 0; i < ARRAY_SIZE(l->rules); i++) {
+ r = &l->rules[i];
+ if (!r->path)
+ continue;
+
+ add_path_beneath(_metadata, rs_fd, r->access, r->path,
+ r->quiet ? LANDLOCK_ADD_RULE_QUIET : 0);
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(rs_fd, l->restrict_flags))
+ {
+ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+ }
+ ASSERT_EQ(0, close(rs_fd));
+
+ if (debug_quiet_tests) {
+ sprint_access_bits(handled_access_s, sizeof(handled_access_s),
+ l->handled_access_fs);
+ sprint_access_bits(quiet_access_s, sizeof(quiet_access_s),
+ l->quiet_access_fs);
+ TH_LOG("applied layer: handled=%s quiet=%s restrict_flags=0x%llx",
+ handled_access_s, quiet_access_s,
+ (unsigned long long)l->restrict_flags);
+ for (i = 0; i < ARRAY_SIZE(l->rules); i++) {
+ r = &l->rules[i];
+ if (!r->path)
+ continue;
+
+ sprint_access_bits(rule_access_s, sizeof(rule_access_s),
+ r->access);
+ TH_LOG(" rule[%d]: path=%s access=%s quiet=%d", i,
+ r->path, rule_access_s, r->quiet);
+ }
+ }
+ return 0;
+}
+
+void audit_quiet_layout1_test_body(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(audit_quiet_layout1) * self,
+ const struct a_target *targets)
+{
+ struct audit_records records = {};
+ int i;
+ const struct a_target *target;
+ int fd = -1;
+ int open_mode;
+ int ret;
+ bool expect_audit;
+ const char *blocker;
+
+ for (i = 0; i < AUDIT_QUIET_MAX_TARGETS; i++) {
+ target = &targets[i];
+ if (!target->target)
+ continue;
+
+ open_mode = target->open_mode & (O_RDONLY | O_WRONLY | O_RDWR);
+
+ EXPECT_TRUE(open_mode == O_RDONLY || open_mode == O_WRONLY ||
+ open_mode == O_RDWR);
+
+ if (target->expect_open_success) {
+ EXPECT_FALSE(target->audit_read_blocked);
+ EXPECT_FALSE(target->audit_write_blocked);
+ }
+ if (target->expect_truncate_success)
+ EXPECT_TRUE(target->expect_open_success &&
+ !target->audit_truncate);
+
+ if (debug_quiet_tests)
+ TH_LOG("Try open \"%s\" with %s%s", target->target,
+ open_mode != O_WRONLY ? "r" : "",
+ open_mode != O_RDONLY ? "w" : "");
+
+ fd = openat(AT_FDCWD, target->target, open_mode | O_CLOEXEC);
+ if (target->expect_open_success) {
+ ASSERT_LE(0, fd)
+ {
+ TH_LOG("Failed to open \"%s\": %s",
+ target->target, strerror(errno));
+ };
+ } else {
+ ASSERT_EQ(-1, fd);
+ ASSERT_EQ(EACCES, errno);
+ }
+
+ expect_audit = true;
+
+ if (target->audit_read_blocked && target->audit_write_blocked)
+ blocker = "fs\\.write_file,fs\\.read_file";
+ else if (target->audit_read_blocked)
+ blocker = "fs\\.read_file";
+ else if (target->audit_write_blocked)
+ blocker = "fs\\.write_file";
+ else
+ expect_audit = false;
+
+ if (expect_audit)
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ blocker, target->target));
+
+ /* Check that we see no (other) logs. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+
+ if (target->expect_open_success && fd >= 0) {
+ if (debug_quiet_tests)
+ TH_LOG("Try ftruncate \"%s\"", target->target);
+
+ ret = ftruncate(fd, 0);
+ if (target->expect_truncate_success) {
+ ASSERT_EQ(0, ret);
+ } else {
+ ASSERT_EQ(-1, ret);
+ if (open_mode != O_RDONLY)
+ ASSERT_EQ(EACCES, errno);
+ }
+
+ if (target->audit_truncate)
+ ASSERT_EQ(0, matches_log_fs(_metadata,
+ self->audit_fd,
+ "fs\\.truncate",
+ target->target));
+
+ if (target->expect_ioctl_allowed ||
+ target->expect_ioctl_denied) {
+ if (debug_quiet_tests)
+ TH_LOG("Try ioctl FIONREAD on \"%s\"",
+ target->target);
+
+ ret = ioctl_error(_metadata, fd, FIONREAD);
+ if (target->expect_ioctl_allowed) {
+ ASSERT_NE(EACCES, ret);
+ } else {
+ ASSERT_EQ(EACCES, ret);
+ }
+ }
+
+ if (target->audit_ioctl)
+ ASSERT_EQ(0, matches_log_fs_extra(
+ _metadata, self->audit_fd,
+ "fs\\.ioctl_dev",
+ target->target,
+ " ioctlcmd=0x541b\\+"));
+
+ /* Check that we see no other logs. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd,
+ &records));
+ ASSERT_EQ(0, records.access);
+ ASSERT_EQ(0, close(fd));
+ }
+ }
+}
+
+TEST_F(audit_quiet_layout1, base)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &variant->layers[i]));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant->targets);
+}
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_simple) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ { .path = dir_s1d1, .access = 0, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ /* Access not quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .audit_write_blocked = true,
+ },
+ /*
+ * Quiet flag only takes effect if all blocked access bits are
+ * quieted, otherwise audit log emitted as normal (with all
+ * blockers)
+ */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_allow_read) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_W,
+ .rules = {
+ { .path = dir_s1d1, .access = FS_R, .quiet = true },
+ /* Quiet flags inherit down and are not overridden */
+ { .path = file1_s1d1, .access = FS_R, .quiet = false },
+ { .path = file1_s2d3, .access = 0, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ /* Read ok */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ },
+ /* Write quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ },
+ /* Read allowed, write quieted so no audit */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s2d2,
+ .open_mode = O_WRONLY,
+ .audit_write_blocked = true,
+ },
+ {
+ .target = file1_s2d2,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ /* Single file quiet */
+ {
+ .target = file1_s2d3,
+ .open_mode = O_WRONLY,
+ },
+ /* Wrong file */
+ {
+ .target = file2_s2d3,
+ .open_mode = O_WRONLY,
+ .audit_write_blocked = true,
+ },
+ /* Access not quieted */
+ {
+ .target = file1_s2d3,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ /* Some access not quieted */
+ {
+ .target = file1_s2d3,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_allow_write) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ { .path = dir_s1d1, .access = FS_W, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ /* Read quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ /* Truncate not quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ /* Write allowed, read quieted so no audit */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, allow_write_quiet_trunc) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_TRUNC,
+ .rules = {
+ { .path = dir_s1d1, .access = FS_W, .quiet = true },
+ { .path = dir_s2d1, .access = FS_W, .quiet = false },
+ },
+ },
+ },
+ .targets = {
+ /* Read not allowed and not quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ /* Truncate quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ },
+ /* Not covered by quiet (truncate) */
+ {
+ .target = file1_s2d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ /* Not covered by quiet (read/write) */
+ {
+ .target = file1_s3d1,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, allow_rw_quiet_trunc) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_TRUNC,
+ .rules = {
+ { .path = dir_s1d1, .access = FS_R | FS_W, .quiet = true },
+ { .path = dir_s2d1, .access = FS_R | FS_W, .quiet = false },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_all) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ { .path = dir_s1d1, .access = 0, .quiet = true },
+ { .path = file1_s2d1, .access = FS_R | FS_W, .quiet = true },
+ { .path = file1_s2d3, .access = 0, .quiet = true },
+ { .path = dir_s3d1, .access = FS_W, .quiet = false },
+ { .path = "/dev/zero", .access = FS_R, .quiet = false },
+ { .path = "/dev/null", .access = FS_R, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ /* No logs */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ /* Truncate quieted - no log */
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ },
+ /* Truncate not covered by quiet */
+ {
+ .target = file1_s3d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s3d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ /* Single file quiet */
+ {
+ .target = file1_s2d3,
+ .open_mode = O_RDWR,
+ },
+ /* Wrong file */
+ {
+ .target = file2_s2d3,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ /* Ioctl quieted */
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ /* Ioctl not quieted */
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ .audit_ioctl = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_across_mountpoint) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ { .path = dir_s3d1, .access = 0, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s3d3,
+ .open_mode = O_RDONLY,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ .audit_write_blocked = true,
+ },
+ /* Access not quieted */
+ {
+ .target = file1_s3d3,
+ .open_mode = O_WRONLY,
+ .audit_write_blocked = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, allow_all_quiet) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = true
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = true
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ .expect_truncate_success = true,
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_allowed = true,
+ },
+ },
+};
+
+/*
+ * With LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF, it doesn't matter what the
+ * quiet flags below the layer say.
+ */
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, subdomains_off) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R,
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+ .rules = {
+ { .path = "/", .access = FS_R, .quiet = false },
+ }
+ },
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ { .path = dir_s1d1, .access = 0, .quiet = true },
+ { .path = file1_s2d2, .access = FS_R | FS_W, .quiet = true },
+ { .path = file1_s2d3, .access = FS_R | FS_W, .quiet = false },
+ { .path = "/dev/null", .access = FS_R | FS_W, .quiet = true },
+ { .path = "/dev/zero", .access = FS_R | FS_W, .quiet = false },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file1_s2d2,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ /* No audit_truncate */
+ },
+ {
+ .target = file1_s2d3,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ /* No audit_truncate */
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ /* No audit_ioctl */
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ /* No audit_ioctl */
+ },
+ },
+};
+
+/*
+ * With LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, it doesn't matter what the
+ * quiet flags on the layer say.
+ */
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, same_exec_off) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R,
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+ .rules = {
+ { .path = dir_s1d1, .access = 0, .quiet = true },
+ { .path = file1_s2d2, .access = FS_R | FS_W, .quiet = true },
+ { .path = file1_s2d3, .access = FS_R | FS_W, .quiet = false },
+ { .path = "/dev/null", .access = FS_R | FS_W, .quiet = true },
+ { .path = "/dev/zero", .access = FS_R | FS_W, .quiet = false },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file1_s2d2,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ /* No audit_truncate */
+ },
+ {
+ .target = file1_s2d3,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ /* No audit_truncate */
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ /* No audit_ioctl */
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ /* No audit_ioctl */
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_1) {
+ /* Here, rules that deny access are always quiet. */
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R,
+ .quiet = true,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R,
+ .quiet = true,
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_2) {
+ /* Here, rules that deny access are never quiet. */
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_W,
+ .quiet = false
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = true
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R,
+ .quiet = false
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = true
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = true
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_W,
+ .quiet = false
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = true
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R,
+ .quiet = false
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ .audit_ioctl = true,
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ .audit_ioctl = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_3) {
+ /* This time only the second layer quiets things. */
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_W,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R,
+ .quiet = true,
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ .audit_ioctl = true,
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_different_quiet_access) {
+ /* Here, rules that deny access are always quiet. */
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R,
+ .quiet = true,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_IOCTL,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = FS_R | FS_W | FS_TRUNC,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = "/dev/null",
+ .access = FS_R | FS_W | FS_IOCTL,
+ .quiet = false,
+ },
+ {
+ .path = "/dev/zero",
+ .access = FS_R,
+ .quiet = true,
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ {
+ .target = file1_s2d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .audit_truncate = true,
+ },
+ {
+ .target = "/dev/null",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ {
+ .target = "/dev/zero",
+ .open_mode = O_RDONLY,
+ .expect_open_success = true,
+ .expect_ioctl_denied = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_different_handled_1) {
+ /* Quiet from layer 1 */
+ .layers = {
+ {
+ .handled_access_fs = FS_R,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_R,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file1_s1d2,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d2,
+ .access = FS_R,
+ .quiet = true,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_W,
+ .quiet_access_fs = FS_W,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_W,
+ .quiet = false,
+ },
+ /* Nothing for file2_s1d1 */
+ {
+ .path = file1_s1d2,
+ .access = FS_W,
+ .quiet = false,
+ },
+ /* Nothing for file2_s1d2 */
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ .expect_truncate_success = true,
+ },
+ /* Missing both, youngest layer denies write, not quiet */
+ {
+ .target = file2_s1d1,
+ .open_mode = O_RDWR,
+ .audit_write_blocked = true,
+ },
+ /* Missing read, denied and quieted by layer 1 */
+ {
+ .target = file1_s1d2,
+ .open_mode = O_RDWR,
+ },
+ /* Missing write, denied and not quieted by layer 2 */
+ {
+ .target = file2_s1d2,
+ .open_mode = O_RDWR,
+ .audit_write_blocked = true,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_different_handled_2) {
+ /* Quiet from layer 2 */
+ .layers = {
+ {
+ .handled_access_fs = FS_R,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_R,
+ .quiet = false,
+ },
+ /* Nothing for file2_s1d1 and file1_s1d2 */
+ {
+ .path = file2_s1d2,
+ .access = FS_R,
+ .quiet = false,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_W,
+ .quiet_access_fs = FS_W,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file1_s1d2,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d2,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ .expect_truncate_success = true,
+ },
+ /* Missing both, youngest layer denies write, quiet */
+ {
+ .target = file2_s1d1,
+ .open_mode = O_RDWR,
+ },
+ /* Missing read, denied and not quieted by layer 1 */
+ {
+ .target = file1_s1d2,
+ .open_mode = O_RDWR,
+ .audit_read_blocked = true,
+ },
+ /* Missing write, denied and quieted by layer 2 */
+ {
+ .target = file2_s1d2,
+ .open_mode = O_RDWR,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, quiet_two_layers_different_handled_3) {
+ /* Quiet from both layers */
+ .layers = {
+ {
+ .handled_access_fs = FS_R,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_R,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file1_s1d2,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d2,
+ .access = FS_R,
+ .quiet = true,
+ },
+ },
+ },
+ {
+ .handled_access_fs = FS_W,
+ .quiet_access_fs = FS_W,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file1_s1d2,
+ .access = FS_W,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d2,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ },
+ },
+ .targets = {
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ .expect_open_success = true,
+ .expect_truncate_success = true,
+ },
+ {
+ .target = file2_s1d1,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file1_s1d2,
+ .open_mode = O_RDWR,
+ },
+ {
+ .target = file2_s1d2,
+ .open_mode = O_RDWR,
+ },
+ },
+};
+
+FIXTURE_VARIANT_ADD(audit_quiet_layout1, without_quiet_then_with_quiet) {
+ .layers = {
+ {
+ .handled_access_fs = FS_R | FS_W,
+ .quiet_access_fs = FS_R,
+ .rules = {
+ { .path = dir_s1d1, .access = FS_W, .quiet = false },
+ { .path = dir_s1d1, .access = 0, .quiet = true },
+ },
+ },
+ },
+ .targets = {
+ /* Read denied and quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDONLY,
+ },
+ /* Write ok */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_WRONLY,
+ .expect_open_success = true,
+ .expect_truncate_success = true,
+ },
+ /* Write ok, read denied and quieted */
+ {
+ .target = file1_s1d1,
+ .open_mode = O_RDWR,
+ },
+ /* Not covered by quiet */
+ {
+ .target = file1_s2d1,
+ .open_mode = O_RDONLY,
+ .audit_read_blocked = true,
+ },
+ },
+};
+
+/*
+ * The following TEST_F extend the above test cases to test more layers, with
+ * the inserted layers having varying configurations.
+ */
+
+/* Extra allow all layers, quiet or not, does not change any behaviour. */
+TEST_F(audit_quiet_layout1, allow_all_layer)
+{
+ struct a_layer allow_all_layer = {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = 0,
+ .rules = {
+ {
+ .path = "/",
+ .access = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet = false,
+ },
+ },
+ };
+ int i;
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &allow_all_layer));
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &variant->layers[i]));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant->targets);
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &allow_all_layer));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant->targets);
+
+ /*
+ * SELF_LOG flags or quiet bits from inner allowing layers should not
+ * affect behaviour.
+ */
+ allow_all_layer.quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL;
+ allow_all_layer.rules[0].quiet = true;
+ /*
+ * Note: this only works because we're not checking counts of domain
+ * alloc/dealloc logs
+ */
+ allow_all_layer.restrict_flags =
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF;
+ ASSERT_EQ(0, apply_a_layer(_metadata, &allow_all_layer));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant->targets);
+}
+
+/*
+ * Add useless outer layers until we reach the layer limit. Should not change
+ * anything.
+ */
+TEST_F(audit_quiet_layout1, many_outer_layers)
+{
+ struct a_layer useless_layer = {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC,
+ .rules = {
+ { .path = "/", .access = FS_R | FS_W | FS_TRUNC, .quiet = true },
+ },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++) {
+ if (variant->layers[i].handled_access_fs == 0)
+ break;
+ }
+
+ for (; i < LANDLOCK_MAX_NUM_LAYERS; i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &useless_layer));
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &variant->layers[i]));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant->targets);
+}
+
+/* An inner layer that denies and quiets everything should result in no logs. */
+TEST_F(audit_quiet_layout1, deny_all_quiet_layer)
+{
+ struct a_layer deny_all_layer = {
+ .handled_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .quiet_access_fs = FS_R | FS_W | FS_TRUNC | FS_IOCTL,
+ .rules = {
+ { .path = "/", .access = 0, .quiet = true },
+ },
+ };
+ int i;
+ FIXTURE_VARIANT(audit_quiet_layout1) variant_2 = {};
+
+ /* Any open should fail with no logs. */
+ for (i = 0; i < ARRAY_SIZE(variant->targets); i++) {
+ const struct a_target *target = &variant->targets[i];
+
+ variant_2.targets[i] = (struct a_target){
+ .target = target->target,
+ .open_mode = target->open_mode,
+ /* We denied everything, open should always fail. */
+ .expect_open_success = false,
+ };
+ }
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &variant->layers[i]));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &deny_all_layer));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant_2.targets);
+}
+
+/*
+ * An inner layer that denies everything without quiet should produce logs for
+ * all access.
+ */
+TEST_F(audit_quiet_layout1, deny_all_layer)
+{
+ struct a_layer deny_all_layer = {
+ .handled_access_fs = FS_R | FS_W,
+ .quiet_access_fs = FS_R | FS_W,
+ };
+ int i;
+ FIXTURE_VARIANT(audit_quiet_layout1) variant_2 = {};
+ bool test_has_subdomains_off = false;
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++) {
+ if (variant->layers[i].restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF) {
+ test_has_subdomains_off = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(variant->targets); i++) {
+ const struct a_target *target = &variant->targets[i];
+
+ variant_2.targets[i] = (struct a_target){
+ .target = target->target,
+ .open_mode = target->open_mode,
+
+ /* We denied everything, open should always fail. */
+ .expect_open_success = false,
+ /* Audit should always happen as long as open request contains read. */
+ .audit_read_blocked = !test_has_subdomains_off &&
+ target->open_mode != O_WRONLY,
+ /* Audit should always happen as long as open request contains write. */
+ .audit_write_blocked = !test_has_subdomains_off &&
+ target->open_mode != O_RDONLY,
+ };
+ }
+
+ for (i = 0; i < ARRAY_SIZE(variant->layers); i++)
+ ASSERT_EQ(0, apply_a_layer(_metadata, &variant->layers[i]));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &deny_all_layer));
+
+ audit_quiet_layout1_test_body(_metadata, self, variant_2.targets);
+}
+
+/* Uses layout1_bind hierarchy */
+FIXTURE(audit_quiet_rename)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit_quiet_rename)
+{
+ prepare_layout(_metadata);
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ if (getenv("DEBUG_QUIET_TESTS"))
+ debug_quiet_tests = true;
+}
+
+FIXTURE_TEARDOWN_PARENT(audit_quiet_rename)
+{
+ remove_layout1(_metadata);
+ cleanup_layout(_metadata);
+
+ /* umount(dir_s2d2)) is handled by namespace lifetime. */
+
+ remove_path(file1_s4d1);
+ remove_path(file2_s4d1);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+static void simple_quiet_rename(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(audit_quiet_rename) *const self,
+ __u64 handled_access, __u64 quiet_access,
+ bool source_allow, bool dest_allow,
+ bool source_quiet, bool dest_quiet,
+ const char *source_blockers,
+ const char *dest_blockers)
+{
+ /* We will move file1_s1d1 to file1_s2d1 */
+ struct a_layer layer = {
+ .handled_access_fs = handled_access,
+ .quiet_access_fs = quiet_access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = source_allow ? handled_access : 0,
+ .quiet = source_quiet,
+ },
+ {
+ .path = dir_s2d1,
+ .access = dest_allow ? handled_access : 0,
+ .quiet = dest_quiet,
+ },
+ },
+ };
+ struct audit_records records = {};
+ int ret, err;
+
+ /* Skip landlock_add_rule for useless rules. */
+ if (!source_allow && !source_quiet)
+ layer.rules[0].path = NULL;
+ if (!dest_allow && !dest_quiet)
+ layer.rules[1].path = NULL;
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+ EXPECT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ if (debug_quiet_tests)
+ TH_LOG("Try renameat \"%s\" to \"%s\"", file1_s1d1, file1_s2d1);
+ ret = renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1);
+ err = errno;
+ if (ret != 0 && debug_quiet_tests) {
+ TH_LOG("renameat error: %s", err == EXDEV ? "EXDEV" :
+ err == EACCES ? "EACCES" :
+ strerror(err));
+ }
+ if (source_allow && dest_allow) {
+ ASSERT_EQ(0, ret);
+ } else {
+ ASSERT_EQ(-1, ret);
+ if (handled_access & (LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE)) {
+ ASSERT_EQ(EACCES, err);
+ } else {
+ ASSERT_EQ(EXDEV, err);
+ }
+
+ if (source_blockers)
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ source_blockers, dir_s1d1));
+ if (dest_blockers)
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ dest_blockers, dir_s2d1));
+ }
+ /*
+ * No other logs. records.domain not checked per reasoning in
+ * audit_quiet_layout1_test_body.
+ */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, rename_ok)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, true, true, false,
+ false, NULL, NULL);
+}
+
+TEST_F(audit_quiet_rename, no_quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, false, false,
+ false, false, "fs\\.remove_file,fs\\.refer",
+ "fs\\.make_reg,fs\\.refer");
+}
+
+TEST_F(audit_quiet_rename, quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, false, false, true,
+ true, NULL, NULL);
+}
+
+TEST_F(audit_quiet_rename, source_no_quiet_dest_quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, false, false,
+ false, true, "fs\\.remove_file,fs\\.refer", NULL);
+}
+
+TEST_F(audit_quiet_rename, source_quiet_dest_no_quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, false, false, true,
+ false, NULL, "fs\\.make_reg,fs\\.refer");
+}
+
+TEST_F(audit_quiet_rename, only_quiet_refer)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, LANDLOCK_ACCESS_FS_REFER,
+ false, false, true, true,
+ "fs\\.remove_file,fs\\.refer",
+ "fs\\.make_reg,fs\\.refer");
+}
+
+TEST_F(audit_quiet_rename, source_allow_dest_quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, true, false, false,
+ true, NULL, NULL);
+}
+
+TEST_F(audit_quiet_rename, source_quiet_dest_allow)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+
+ simple_quiet_rename(_metadata, self, access, access, false, true, true,
+ false, NULL, NULL);
+}
+
+TEST_F(audit_quiet_rename, handle_all_deny_quiet_refer)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_REFER,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* No logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, handle_all_deny_not_quiet_refer)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = 0,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = false,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s2d1));
+
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, handle_all_deny_refer_quiet_source_not_quiet_dest)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_REFER,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ .quiet = false,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s2d1));
+
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_same_dir)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_flag_on_file_ignored)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file1_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d1));
+ /* We didn't unlink destination file */
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d1));
+
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_flag_on_file_ignored_same_dir)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = file1_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = file2_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0,
+ matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg", dir_s1d1));
+
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, two_layers_different_quiet1)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer1 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = access,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct a_layer layer2 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_REFER,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = access,
+ .quiet = false,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer2));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * The youngest denial will be layer 2. Refer is quieted but we are
+ * also missing remove_file on source.
+ */
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d1));
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, two_layers_different_quiet2)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer1 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = access,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct a_layer layer2 = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_REFER,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_REFER,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ .quiet = false,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer2));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * The youngest denial will be layer 2, but refer is quieted (and that
+ * layer does not handle any other accesses).
+ */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, two_layers_different_quiet3)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer1 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = access,
+ .quiet = false,
+ },
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct a_layer layer2 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = access,
+ .quiet = false,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer2));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * The youngest denial will be layer 2, in which everything is quieted.
+ */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename,
+ first_layer_quiet_deny_all_second_layer_not_quiet_deny_all)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer1 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct a_layer layer2 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {},
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer2));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d1));
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_reg,fs\\.refer", dir_s2d1));
+ /* No other logs. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename,
+ first_layer_quiet_deny_all_second_layer_dest_not_quiet)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer1 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct a_layer layer2 = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file1_s2d1));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer1));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer2));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Source is quieted but destination is not. */
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_reg,fs\\.refer", dir_s2d1));
+ /* No other logs. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, rename_xchg)
+{
+ struct a_layer layer = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER,
+ .quiet_access_fs = LANDLOCK_ACCESS_FS_MAKE_REG,
+ .rules = { {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER,
+ .quiet = true,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER,
+ .quiet = false,
+ } },
+ };
+ struct audit_records records = {};
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_on_parent_mount)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, bind_file1_s1d3, AT_FDCWD,
+ bind_file2_s1d3));
+ ASSERT_EQ(EACCES, errno);
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_behind_mountpoint_ignored)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s1d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+
+ EXPECT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1, renameat(AT_FDCWD, bind_file1_s1d3, AT_FDCWD,
+ bind_file2_s1d3));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg",
+ bind_dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_on_parent_mount_disconnected)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s2d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+ int bind_s1d3_fd;
+
+ EXPECT_EQ(0, unlink(file2_s1d3));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_DIRECTORY);
+ ASSERT_GE(bind_s1d3_fd, 0);
+
+ /* Make s1d3 disconnected. */
+ create_directory(_metadata, dir_s4d1);
+ ASSERT_EQ(0, renameat(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s4d2));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1,
+ renameat(bind_s1d3_fd, file1_name, bind_s1d3_fd, file2_name));
+ ASSERT_EQ(EACCES, errno);
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
+TEST_F(audit_quiet_rename, quiet_behind_mountpoint_disconnected)
+{
+ __u64 access = LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_REFER;
+ struct a_layer layer = {
+ .handled_access_fs = access,
+ .quiet_access_fs = access,
+ .rules = {
+ {
+ .path = dir_s4d1,
+ .access = 0,
+ .quiet = true,
+ },
+ },
+ };
+ struct audit_records records = {};
+ int bind_s1d3_fd;
+
+ EXPECT_EQ(0, unlink(file2_s1d3));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_DIRECTORY);
+ ASSERT_GE(bind_s1d3_fd, 0);
+
+ /* Make s1d3 disconnected. */
+ create_directory(_metadata, dir_s4d1);
+ ASSERT_EQ(0, renameat(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s4d2));
+
+ ASSERT_EQ(0, apply_a_layer(_metadata, &layer));
+
+ ASSERT_EQ(-1,
+ renameat(bind_s1d3_fd, file1_name, bind_s1d3_fd, file2_name));
+ ASSERT_EQ(EACCES, errno);
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ ASSERT_EQ(0, records.access);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4c528154ea92..2ed1f76b7a8b 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -35,6 +35,7 @@ enum sandbox_type {
NO_SANDBOX,
/* This may be used to test rules that allow *and* deny accesses. */
TCP_SANDBOX,
+ UDP_SANDBOX,
};
static int set_service(struct service_fixture *const srv,
@@ -93,23 +94,53 @@ static bool prot_is_tcp(const struct protocol_variant *const prot)
(prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
}
+static bool prot_is_udp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_DGRAM &&
+ (prot->protocol == IPPROTO_UDP || prot->protocol == IPPROTO_IP);
+}
+
static bool is_restricted(const struct protocol_variant *const prot,
const enum sandbox_type sandbox)
{
if (sandbox == TCP_SANDBOX)
return prot_is_tcp(prot);
+ else if (sandbox == UDP_SANDBOX)
+ return prot_is_udp(prot);
return false;
}
static int socket_variant(const struct service_fixture *const srv)
{
+ /* Arbitrary value just to not block other tests indefinitely. */
+ const struct timeval timeout = {
+ .tv_sec = 0,
+ .tv_usec = 100000,
+ };
+ int sockfd;
int ret;
- ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
- srv->protocol.protocol);
- if (ret < 0)
+ sockfd = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+ srv->protocol.protocol);
+ if (sockfd < 0)
return -errno;
- return ret;
+
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout));
+ if (ret != 0) {
+ ret = -errno;
+ close(sockfd);
+ return ret;
+ }
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout));
+ if (ret != 0) {
+ ret = -errno;
+ close(sockfd);
+ return ret;
+ }
+ return sockfd;
}
#ifndef SIN6_LEN_RFC2133
@@ -258,9 +289,163 @@ static int connect_variant(const int sock_fd,
return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
}
+static int sendto_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen, void *buf,
+ size_t len, size_t flags)
+{
+ const struct sockaddr *dst = NULL;
+ ssize_t ret;
+
+ /*
+ * We never want our processes to be killed by SIGPIPE: we check return
+ * codes and errno, so that we have actual error messages.
+ */
+ flags |= MSG_NOSIGNAL;
+
+ if (srv != NULL) {
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ dst = (const struct sockaddr *)&srv->ipv4_addr;
+ break;
+
+ case AF_INET6:
+ dst = (const struct sockaddr *)&srv->ipv6_addr;
+ break;
+
+ case AF_UNIX:
+ dst = (const struct sockaddr *)&srv->unix_addr;
+ break;
+
+ default:
+ errno = EAFNOSUPPORT;
+ return -errno;
+ }
+ }
+
+ ret = sendto(sock_fd, buf, len, flags, dst, addrlen);
+ if (ret < 0)
+ return -errno;
+
+ /* errno is not set in cases of partial writes. */
+ if (ret != len)
+ return -EINTR;
+
+ return 0;
+}
+
+static int sendto_variant(const int sock_fd,
+ const struct service_fixture *const srv, void *buf,
+ size_t len, size_t flags)
+{
+ socklen_t addrlen = 0;
+
+ if (srv != NULL)
+ addrlen = get_addrlen(srv, false);
+
+ return sendto_variant_addrlen(sock_fd, srv, addrlen, buf, len, flags);
+}
+
+static int test_sendmsg(struct __test_metadata *const _metadata,
+ const struct protocol_variant *prot, int client_fd,
+ int server_fd, const struct service_fixture *srv,
+ bool bind_denied, bool send_denied)
+{
+ int ret;
+ socklen_t opt_len;
+ int sock_type;
+ int addr_family;
+ struct sockaddr_storage peer_addr = { 0 };
+ bool has_remote_port;
+ bool needs_autobind;
+ char read_buf[1] = { 0 };
+
+ /*
+ * Prepare the test by inspecting the socket type and whether it has a
+ * local/remote address set (all of which determine the expected
+ * outcomes).
+ */
+ opt_len = sizeof(sock_type);
+ ASSERT_EQ(0, getsockopt(client_fd, SOL_SOCKET, SO_TYPE, &sock_type,
+ &opt_len));
+ opt_len = sizeof(addr_family);
+ ASSERT_EQ(0, getsockopt(client_fd, SOL_SOCKET, SO_DOMAIN, &addr_family,
+ &opt_len));
+ opt_len = sizeof(peer_addr);
+ has_remote_port = (getpeername(client_fd, (struct sockaddr *)&peer_addr,
+ &opt_len) == 0);
+ needs_autobind = (addr_family == AF_INET || addr_family == AF_INET6) &&
+ get_binded_port(client_fd, prot) == 0;
+
+ /* First, check error code with truncated explicit address. */
+ if (srv != NULL) {
+ ret = sendto_variant_addrlen(
+ client_fd, srv, get_addrlen(srv, true) - 1, "A", 1, 0);
+ if (sock_type == SOCK_STREAM && !has_remote_port) {
+ EXPECT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else if (bind_denied && needs_autobind) {
+ EXPECT_EQ(-EACCES, ret)
+ {
+ return -1;
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret)
+ {
+ return -1;
+ }
+ }
+ }
+
+ /* With or without explicit destination address (srv can be NULL). */
+ ret = sendto_variant(client_fd, srv, "B", 1, 0);
+ if (sock_type == SOCK_STREAM && !has_remote_port) {
+ EXPECT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else if ((send_denied && srv != NULL) ||
+ (bind_denied && needs_autobind)) {
+ ASSERT_EQ(-EACCES, ret)
+ {
+ return -1;
+ }
+ } else if (srv == NULL && !has_remote_port) {
+ if (addr_family == AF_UNIX) {
+ ASSERT_EQ(-ENOTCONN, ret)
+ {
+ return -1;
+ }
+ } else if (sock_type == SOCK_STREAM) {
+ ASSERT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else {
+ ASSERT_EQ(-EDESTADDRREQ, ret)
+ {
+ return -1;
+ }
+ }
+ } else {
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(1, recv(server_fd, read_buf, 1, 0));
+ ASSERT_EQ(read_buf[0], 'B')
+ {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
FIXTURE(protocol)
{
- struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+ struct service_fixture srv0, srv1, srv2;
+ struct service_fixture unspec_any0, unspec_srv0, unspec_srv1;
};
FIXTURE_VARIANT(protocol)
@@ -271,10 +456,9 @@ FIXTURE_VARIANT(protocol)
FIXTURE_SETUP(protocol)
{
- const struct protocol_variant prot_unspec = {
- .domain = AF_UNSPEC,
- .type = SOCK_STREAM,
- };
+ struct protocol_variant prot_unspec = variant->prot;
+
+ prot_unspec.domain = AF_UNSPEC;
disable_caps(_metadata);
@@ -283,6 +467,7 @@ FIXTURE_SETUP(protocol)
ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+ ASSERT_EQ(0, set_service(&self->unspec_srv1, prot_unspec, 1));
ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -510,6 +695,92 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_udp1) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_udp2) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_udp1) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_udp2) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
static void test_bind_and_connect(struct __test_metadata *const _metadata,
const struct service_fixture *const srv,
const bool deny_bind, const bool deny_connect)
@@ -602,7 +873,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
ret = connect_variant(connect_fd, srv);
if (deny_connect) {
EXPECT_EQ(-EACCES, ret);
- } else if (deny_bind) {
+ } else if (deny_bind && srv->protocol.type == SOCK_STREAM) {
/* No listening server. */
EXPECT_EQ(-ECONNREFUSED, ret);
} else {
@@ -641,18 +912,25 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
TEST_F(protocol, bind)
{
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = bind_access | conn_access,
};
- const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_p0 = {
+ .allowed_access = bind_access | conn_access,
.port = self->srv0.port,
};
- const struct landlock_net_port_attr tcp_connect_p1 = {
- .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr connect_p1 = {
+ .allowed_access = conn_access,
.port = self->srv1.port,
};
int ruleset_fd;
@@ -664,12 +942,26 @@ TEST_F(protocol, bind)
/* Allows connect and bind for the first port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_p0, 0));
+ &bind_connect_p0, 0));
/* Allows connect and denies bind for the second port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_connect_p1, 0));
+ &connect_p1, 0));
+
+ /*
+ * For UDP sockets, allows binding to ephemeral ports (required
+ * to connect or send a first datagram)
+ */
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_net_port_attr bind_ephemeral = {
+ .allowed_access = bind_access,
+ .port = 0,
+ };
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &bind_ephemeral, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -691,18 +983,25 @@ TEST_F(protocol, bind)
TEST_F(protocol, connect)
{
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = bind_access | conn_access,
};
- const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_p0 = {
+ .allowed_access = bind_access | conn_access,
.port = self->srv0.port,
};
- const struct landlock_net_port_attr tcp_bind_p1 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ const struct landlock_net_port_attr bind_p1 = {
+ .allowed_access = bind_access,
.port = self->srv1.port,
};
int ruleset_fd;
@@ -714,12 +1013,26 @@ TEST_F(protocol, connect)
/* Allows connect and bind for the first port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_p0, 0));
+ &bind_connect_p0, 0));
/* Allows bind and denies connect for the second port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_p1, 0));
+ &bind_p1, 0));
+
+ /*
+ * For UDP sockets, allows binding to ephemeral ports (required
+ * to connect or send a first datagram)
+ */
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_net_port_attr bind_ephemeral = {
+ .allowed_access = bind_access,
+ .port = 0,
+ };
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &bind_ephemeral, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -737,16 +1050,20 @@ TEST_F(protocol, connect)
TEST_F(protocol, bind_unspec)
{
+ const __u64 bind_access = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .handled_access_net = bind_access,
};
- const struct landlock_net_port_attr tcp_bind = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ const struct landlock_net_port_attr rule_bind = {
+ .allowed_access = bind_access,
.port = self->srv0.port,
};
int bind_fd, ret;
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -754,7 +1071,7 @@ TEST_F(protocol, bind_unspec)
/* Allows bind. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind, 0));
+ &rule_bind, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -782,7 +1099,8 @@ TEST_F(protocol, bind_unspec)
}
EXPECT_EQ(0, close(bind_fd));
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -828,11 +1146,21 @@ TEST_F(protocol, bind_unspec)
TEST_F(protocol, connect_unspec)
{
- const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const __u64 connect_right =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const __u64 bind_right = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const struct landlock_ruleset_attr ruleset_conn = {
+ .handled_access_net = connect_right,
};
- const struct landlock_net_port_attr tcp_connect = {
- .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_ruleset_attr ruleset_conn_bind = {
+ .handled_access_net = connect_right | bind_right,
+ };
+ const struct landlock_net_port_attr rule_connect = {
+ .allowed_access = connect_right,
.port = self->srv0.port,
};
int bind_fd, client_fd, status;
@@ -865,15 +1193,16 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, ret);
}
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
- &ruleset_attr, sizeof(ruleset_attr), 0);
+ &ruleset_conn, sizeof(ruleset_conn), 0);
ASSERT_LE(0, ruleset_fd);
/* Allows connect. */
ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
LANDLOCK_RULE_NET_PORT,
- &tcp_connect, 0));
+ &rule_connect, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -896,12 +1225,14 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, ret);
}
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
- &ruleset_attr, sizeof(ruleset_attr), 0);
+ &ruleset_conn_bind, sizeof(ruleset_conn_bind),
+ 0);
ASSERT_LE(0, ruleset_fd);
- /* Denies connect. */
+ /* Denies connect and bind. */
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -950,6 +1281,441 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, close(bind_fd));
}
+TEST_F(protocol, sendmsg_stream)
+{
+ int srv0_fd, tmp_fd, client_fd, res;
+ char read_buf[1] = { 0 };
+
+ /*
+ * Simple test for stream sockets: just deny all connect()/
+ * send(explicit addr)/bind(), and make sure we don't interfere with any
+ * operation.
+ */
+ if (variant->prot.type != SOCK_STREAM)
+ return;
+
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+ ASSERT_EQ(0, listen(srv0_fd, backlog));
+
+ /* Send on a non-connected socket. */
+ res = sendto_variant(client_fd, NULL, "A", 1, 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-ENOTCONN, res);
+ } else {
+ EXPECT_EQ(-EPIPE, res);
+ }
+
+ /* Send to a truncated (invalid) address on a non-connected socket. */
+ res = sendto_variant_addrlen(client_fd, &self->srv0,
+ get_addrlen(&self->srv0, true) - 1, "B", 1,
+ 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EOPNOTSUPP, res);
+ } else {
+ EXPECT_EQ(-EPIPE, res);
+ }
+
+ /* Connect. */
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+ tmp_fd = accept(srv0_fd, NULL, 0);
+ ASSERT_LE(0, tmp_fd);
+ EXPECT_EQ(0, close(srv0_fd));
+ srv0_fd = tmp_fd;
+
+ /* Send without an explicit address. */
+ EXPECT_EQ(0, sendto_variant(client_fd, NULL, "C", 1, 0));
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'C');
+
+ /* Send to a truncated (invalid) address. */
+ res = sendto_variant_addrlen(client_fd, &self->srv0,
+ get_addrlen(&self->srv0, true) - 1, "D", 1,
+ 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EISCONN, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'D');
+ }
+
+ /* Send to a valid but different address. */
+ res = sendto_variant(client_fd, &self->srv1, "E", 1, 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EISCONN, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'E');
+ }
+
+ EXPECT_EQ(0, close(client_fd));
+}
+
+TEST_F(protocol, sendmsg_dgram)
+{
+ const bool restricted = is_restricted(&variant->prot, variant->sandbox);
+ int srv0_fd, srv1_fd, client_fd, child, status, res;
+
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ /* Prepare server on port #0 to be allowed. */
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+
+ /* And another server on port #1 to be denied. */
+ ASSERT_LE(0, srv1_fd = socket_variant(&self->srv1));
+ ASSERT_EQ(0, bind_variant(srv1_fd, &self->srv1));
+
+ /*
+ * Check that sockets connected before restrictions are not impacted in
+ * any way.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+ if (variant->sandbox == UDP_SANDBOX) {
+ /* Deny all connect()/send(explicit addr)/bind(). */
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ EXPECT_EQ(0,
+ test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, NULL, restricted, restricted));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ restricted));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv1_fd, &self->srv1, restricted,
+ restricted));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Restrict connect/send, but not bind(). Then try sending with no
+ * destination (and no remote peer set), an allowed destination, then a
+ * denied destination.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const struct landlock_net_port_attr send_p0 = {
+ .allowed_access =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv0.port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &send_p0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ -1, NULL, false, false));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, false, false));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv1_fd, &self->srv1, false,
+ restricted));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Rest of this test is just for autobind enforcement, which only exists
+ * in IP sockets.
+ */
+ if (variant->prot.domain != AF_INET && variant->prot.domain != AF_INET6)
+ return;
+
+ /* Restrict bind() to explicit calls with an arbitrary (non-0) port. */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const uint16_t allowed_src_port = 42424;
+ struct service_fixture allowed_src;
+
+ allowed_src = self->srv0;
+ set_port(&allowed_src, allowed_src_port);
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_UDP,
+ .port = allowed_src_port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+
+ /* Check that implicit bind(0) in sendmsg() is denied. */
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ false));
+
+ /* Same thing for autobind in connect(). */
+ res = connect_variant(client_fd, &self->srv0);
+ if (restricted) {
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ EXPECT_EQ(0, res);
+ }
+ EXPECT_EQ(0, close(client_fd));
+
+ /* Make sendmsg() work by explicitly binding to the only allowed port. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, bind_variant(client_fd, &allowed_src));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ false));
+ EXPECT_EQ(0, close(client_fd));
+
+ /* Make connect() work by explicitly binding to the only allowed port. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, bind_variant(client_fd, &allowed_src));
+ EXPECT_EQ(0, connect_variant(client_fd, &self->srv0));
+ EXPECT_EQ(0, close(client_fd));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Check that %LANDLOCK_ACCESS_NET_BIND_UDP on port 0 allows implicit
+ * autobinds.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_UDP,
+ .port = 0,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, false, false));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+}
+
+TEST_F(protocol, sendmsg_unspec)
+{
+ const bool restricted = is_restricted(&variant->prot, variant->sandbox);
+ int client_fd, srv0_fd, srv1_fd, res;
+ char read_buf[1] = { 0 };
+
+ /*
+ * We already test for the absence of influence on sendmsg for other
+ * socket types and other address families, there's no point in adapting
+ * this test for stream sockets too.
+ */
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ /* Prepare client of the right family. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+
+ /* Prepare server on port #0 to be allowed. */
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+
+ /* And another server on port #1 to be denied. */
+ ASSERT_LE(0, srv1_fd = socket_variant(&self->srv1));
+ ASSERT_EQ(0, bind_variant(srv1_fd, &self->srv1));
+
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv0.port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Explicit AF_UNSPEC address but truncated. */
+ EXPECT_EQ(-EINVAL, sendto_variant_addrlen(
+ client_fd, &self->unspec_srv0,
+ get_addrlen(&self->unspec_srv0, true) - 1,
+ "A", 1, 0));
+
+ /*
+ * Explicit AF_UNSPEC address, should be treated as AF_INET by IPv4
+ * sockets (and thus map to srv0, allowed), but be denied by IPv6
+ * sockets.
+ */
+ res = sendto_variant(client_fd, &self->unspec_srv0, "B", 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* Always denied on IPv6 socket. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /* IPv6 sockets treat AF_UNSPEC as a NULL address. */
+ EXPECT_EQ(-EDESTADDRREQ, res);
+ }
+ } else if (variant->prot.domain == AF_INET) {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv0_fd, read_buf, 1))
+ {
+ TH_LOG("read() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'B');
+ } else {
+ /* Unix sockets don't accept AF_UNSPEC. */
+ EXPECT_EQ(-EINVAL, res);
+ }
+
+ /*
+ * Explicit AF_UNSPEC address, should be treated as AF_INET on IPv4
+ * sockets (and thus map to srv1, denied), and be denied on IPv6 sockets
+ * as always.
+ */
+ res = sendto_variant(client_fd, &self->unspec_srv1, "C", 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* Always denied on IPv6 socket. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /* IPv6 sockets treat AF_UNSPEC as a NULL address. */
+ EXPECT_EQ(-EDESTADDRREQ, res);
+ }
+ } else if (variant->prot.domain == AF_INET) {
+ if (restricted) {
+ /* Sending to srv1 is not allowed, only srv0. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv1_fd, read_buf, 1))
+ {
+ TH_LOG("read() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'C');
+ }
+ } else {
+ /* Unix sockets don't accept AF_UNSPEC. */
+ EXPECT_EQ(-EINVAL, res);
+ }
+
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+
+ /* Minimal explicit AF_UNSPEC address (just the sa_family_t field) */
+ res = sendto_variant_addrlen(client_fd, &self->unspec_srv0,
+ get_addrlen(&self->unspec_srv0, true), "D",
+ 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* AF_UNSPEC is always denied in IPv6. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /*
+ * IPv6 sockets treat AF_UNSPEC as a NULL address,
+ * falling back to the connected address.
+ */
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv0_fd, read_buf, 1));
+ EXPECT_EQ(read_buf[0], 'D');
+ }
+ } else {
+ /*
+ * IPv4 socket will expect a struct sockaddr_in, our address is
+ * considered truncated. And Unix sockets don't accept
+ * AF_UNSPEC at all.
+ */
+ EXPECT_EQ(-EINVAL, res);
+ }
+}
+
FIXTURE(ipv4)
{
struct service_fixture srv0, srv1;
@@ -976,6 +1742,13 @@ FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
};
/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, udp_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
/* clang-format on */
.sandbox = NO_SANDBOX,
@@ -989,6 +1762,13 @@ FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
.type = SOCK_DGRAM,
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, udp_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
FIXTURE_SETUP(ipv4)
{
const struct protocol_variant prot = {
@@ -1012,14 +1792,19 @@ TEST_F(ipv4, from_unix_to_inet)
{
int unix_stream_fd, unix_dgram_fd;
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
};
const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .allowed_access = access_rights,
.port = self->srv0.port,
};
int ruleset_fd;
@@ -1326,11 +2111,13 @@ FIXTURE_TEARDOWN(mini)
/* clang-format off */
-#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP
#define ACCESS_ALL ( \
LANDLOCK_ACCESS_NET_BIND_TCP | \
- LANDLOCK_ACCESS_NET_CONNECT_TCP)
+ LANDLOCK_ACCESS_NET_CONNECT_TCP | \
+ LANDLOCK_ACCESS_NET_BIND_UDP | \
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP)
/* clang-format on */
@@ -1678,6 +2465,7 @@ TEST_F(ipv4_tcp, with_fs)
FIXTURE(port_specific)
{
struct service_fixture srv0;
+ struct service_fixture cli1;
};
FIXTURE_VARIANT(port_specific)
@@ -1697,7 +2485,7 @@ FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+FIXTURE_VARIANT_ADD(port_specific, tcp_sandbox_with_ipv4) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
@@ -1707,6 +2495,16 @@ FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
};
/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, udp_sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
/* clang-format on */
.sandbox = NO_SANDBOX,
@@ -1717,7 +2515,7 @@ FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+FIXTURE_VARIANT_ADD(port_specific, tcp_sandbox_with_ipv6) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
@@ -1726,11 +2524,22 @@ FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, udp_sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
FIXTURE_SETUP(port_specific)
{
disable_caps(_metadata);
ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->cli1, variant->prot, 1));
setup_loopback(_metadata);
};
@@ -1745,14 +2554,19 @@ TEST_F(port_specific, bind_connect_zero)
uint16_t port;
/* Adds a rule layer with bind and connect actions. */
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP
+ .handled_access_net = access_rights,
};
- const struct landlock_net_port_attr tcp_bind_connect_zero = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_zero = {
+ .allowed_access = access_rights,
.port = 0,
};
int ruleset_fd;
@@ -1764,7 +2578,7 @@ TEST_F(port_specific, bind_connect_zero)
/* Checks zero port value on bind and connect actions. */
EXPECT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_zero, 0));
+ &bind_connect_zero, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -1785,11 +2599,16 @@ TEST_F(port_specific, bind_connect_zero)
ret = bind_variant(bind_fd, &self->srv0);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
/* Connects on port 0. */
ret = connect_variant(connect_fd, &self->srv0);
- EXPECT_EQ(-ECONNREFUSED, ret);
+ if (variant->prot.type == SOCK_STREAM) {
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
/* Sets binded port for both protocol families. */
port = get_binded_port(bind_fd, &variant->prot);
@@ -1813,23 +2632,35 @@ TEST_F(port_specific, bind_connect_1023)
int bind_fd, connect_fd, ret;
/* Adds a rule layer with bind and connect actions. */
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_right = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ (LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP) :
+ (LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP));
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP
+ .handled_access_net = access_rights,
};
/* A rule with port value less than 1024. */
- const struct landlock_net_port_attr tcp_bind_connect_low_range = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_low_range = {
+ .allowed_access = access_rights,
.port = 1023,
};
/* A rule with 1024 port. */
- const struct landlock_net_port_attr tcp_bind_connect = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect = {
+ .allowed_access = access_rights,
.port = 1024,
};
+ /* A rule with cli1's port, to use as source port. */
+ const struct landlock_net_port_attr srcport = {
+ .allowed_access = bind_right,
+ .port = self->cli1.port,
+ };
int ruleset_fd;
ruleset_fd = landlock_create_ruleset(&ruleset_attr,
@@ -1838,10 +2669,15 @@ TEST_F(port_specific, bind_connect_1023)
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_low_range, 0));
+ &bind_connect_low_range, 0));
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect, 0));
+ &bind_connect, 0));
+ if (variant->sandbox == UDP_SANDBOX) {
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &srcport, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -1850,9 +2686,6 @@ TEST_F(port_specific, bind_connect_1023)
bind_fd = socket_variant(&self->srv0);
ASSERT_LE(0, bind_fd);
- connect_fd = socket_variant(&self->srv0);
- ASSERT_LE(0, connect_fd);
-
/* Sets address port to 1023 for both protocol families. */
set_port(&self->srv0, 1023);
/* Binds on port 1023. */
@@ -1865,8 +2698,19 @@ TEST_F(port_specific, bind_connect_1023)
ret = bind_variant(bind_fd, &self->srv0);
clear_cap(_metadata, CAP_NET_BIND_SERVICE);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+ if (variant->prot.type == SOCK_DGRAM) {
+ /*
+ * We are about to connect(), but bind() is restricted, so for
+ * UDP sockets we need to use cli1's port as source port (the
+ * only one we are allowed to use).
+ */
+ EXPECT_EQ(0, bind_variant(connect_fd, &self->cli1));
+ }
/* Connects on the binded port 1023. */
ret = connect_variant(connect_fd, &self->srv0);
EXPECT_EQ(0, ret);
@@ -1885,7 +2729,10 @@ TEST_F(port_specific, bind_connect_1023)
/* Binds on port 1024. */
ret = bind_variant(bind_fd, &self->srv0);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_DGRAM)
+ EXPECT_EQ(0, bind_variant(connect_fd, &self->cli1));
/* Connects on the binded port 1024. */
ret = connect_variant(connect_fd, &self->srv0);
@@ -1895,23 +2742,41 @@ TEST_F(port_specific, bind_connect_1023)
EXPECT_EQ(0, close(bind_fd));
}
-static int matches_log_tcp(const int audit_fd, const char *const blockers,
- const char *const dir_addr, const char *const addr,
- const char *const dir_port)
+/**
+ * matches_auditlog - Check audit log for a network access denial
+ *
+ * @audit_fd: Audit file descriptor.
+ * @blockers: A regex-escaped blocker string, e.g., "net\.bind_tcp".
+ * @dir_addr: Either "saddr" or "daddr", ignored if addr is NULL.
+ * @addr: A regex-escaped IP address string, or NULL.
+ * @dir_port: Either "src" or "dest", ignored if addr is NULL.
+ * @port: A port number, ignored if addr is NULL.
+ */
+static int matches_auditlog(const int audit_fd, const char *const blockers,
+ const char *const dir_addr, const char *const addr,
+ const char *const dir_port, const __u16 port)
{
- static const char log_template[] = REGEX_LANDLOCK_PREFIX
- " blockers=%s %s=%s %s=1024$";
+ static const char log_with_addrport_tmpl[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s %s=%s %s=%u$";
+ static const char log_without_addrport_tmpl[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s";
/*
* Max strlen(blockers): 16
* Max strlen(dir_addr): 5
* Max strlen(addr): 12
* Max strlen(dir_port): 4
+ * Max strlen(%u port): 5
*/
- char log_match[sizeof(log_template) + 37];
+ char log_match[sizeof(log_with_addrport_tmpl) + 42];
int log_match_len;
- log_match_len = snprintf(log_match, sizeof(log_match), log_template,
- blockers, dir_addr, addr, dir_port);
+ if (addr == NULL)
+ log_match_len = snprintf(log_match, sizeof(log_match),
+ log_without_addrport_tmpl, blockers);
+ else
+ log_match_len = snprintf(log_match, sizeof(log_match),
+ log_with_addrport_tmpl, blockers,
+ dir_addr, addr, dir_port, port);
if (log_match_len > sizeof(log_match))
return -E2BIG;
@@ -1922,6 +2787,10 @@ static int matches_log_tcp(const int audit_fd, const char *const blockers,
FIXTURE(audit)
{
struct service_fixture srv0;
+ struct service_fixture srv1;
+ /* srv2 has a rule with no access but quiet bit set. */
+ struct service_fixture srv2;
+ struct service_fixture unspec_srv0;
struct audit_filter audit_filter;
int audit_fd;
};
@@ -1933,7 +2802,7 @@ FIXTURE_VARIANT(audit)
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(audit, ipv4) {
+FIXTURE_VARIANT_ADD(audit, ipv4_tcp) {
/* clang-format on */
.addr = "127\\.0\\.0\\.1",
.prot = {
@@ -1943,7 +2812,17 @@ FIXTURE_VARIANT_ADD(audit, ipv4) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(audit, ipv6) {
+FIXTURE_VARIANT_ADD(audit, ipv4_udp) {
+ /* clang-format on */
+ .addr = "127\\.0\\.0\\.1",
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6_tcp) {
/* clang-format on */
.addr = "::1",
.prot = {
@@ -1952,9 +2831,27 @@ FIXTURE_VARIANT_ADD(audit, ipv6) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6_udp) {
+ /* clang-format on */
+ .addr = "::1",
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
FIXTURE_SETUP(audit)
{
+ struct protocol_variant prot_unspec = variant->prot;
+
+ prot_unspec.domain = AF_UNSPEC;
+
ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+ ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
+ ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+
setup_loopback(_metadata);
set_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -1972,9 +2869,22 @@ FIXTURE_TEARDOWN(audit)
TEST_F(audit, bind)
{
+ const char *audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.bind_tcp" :
+ "net\\.bind_udp");
+ const __u64 access_rights =
+ (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
+ .quiet_access_net = access_rights,
+ };
+ const struct landlock_net_port_attr quiet_rule = {
+ .allowed_access = 0,
+ .port = self->srv2.port,
};
struct audit_records records;
int ruleset_fd, sock_fd;
@@ -1982,27 +2892,58 @@ TEST_F(audit, bind)
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &quiet_rule, LANDLOCK_ADD_RULE_QUIET));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
sock_fd = socket_variant(&self->srv0);
ASSERT_LE(0, sock_fd);
EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0));
- EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr",
- variant->addr, "src"));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, audit_evt, "saddr",
+ variant->addr, "src", self->srv0.port));
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
EXPECT_EQ(1, records.domain);
EXPECT_EQ(0, close(sock_fd));
+
+ /* Bind to srv2 (with quiet rule): no new audit logs. */
+ sock_fd = socket_variant(&self->srv2);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv2));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
}
TEST_F(audit, connect)
{
+ const char *audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.connect_tcp" :
+ "net\\.connect_send_udp");
+ const __u64 bind_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const __u64 access_rights = bind_right | conn_right;
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
+ .quiet_access_net = access_rights,
+ };
+ const struct landlock_net_port_attr rule_connect_p1 = {
+ .allowed_access = conn_right,
+ .port = self->srv1.port,
+ };
+ const struct landlock_net_port_attr quiet_rule = {
+ .allowed_access = 0,
+ .port = self->srv2.port,
};
struct audit_records records;
int ruleset_fd, sock_fd;
@@ -2010,14 +2951,179 @@ TEST_F(audit, connect)
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule_connect_p1, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &quiet_rule, LANDLOCK_ADD_RULE_QUIET));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
sock_fd = socket_variant(&self->srv0);
ASSERT_LE(0, sock_fd);
EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0));
- EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp",
- "daddr", variant->addr, "dest"));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, audit_evt, "daddr",
+ variant->addr, "dest", self->srv0.port));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ if (variant->prot.type == SOCK_DGRAM) {
+ /* Check that autobind generates a denied bind event. */
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv1));
+
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.bind_udp",
+ NULL, NULL, NULL, 0));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+ }
+
+ EXPECT_EQ(0, close(sock_fd));
+
+ /* Connect to srv2 (with quiet rule): no new audit logs. */
+ sock_fd = socket_variant(&self->srv2);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv2));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+/* Quieting bind access has no effect on connect. */
+TEST_F(audit, connect_quiet_bind)
+{
+ const char *audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.connect_tcp" :
+ "net\\.connect_send_udp");
+ const int bind_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const int conn_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const int access_rights = bind_right | conn_right;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = access_rights,
+ .quiet_access_net = bind_right,
+ };
+ const struct landlock_ruleset_attr ruleset_attr_2 = {
+ .handled_access_net = access_rights,
+ .quiet_access_net = conn_right,
+ };
+ const struct landlock_net_port_attr quiet_rule = {
+ .allowed_access = 0,
+ .port = self->srv2.port,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &quiet_rule, LANDLOCK_ADD_RULE_QUIET));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv2);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv2));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, audit_evt, "daddr",
+ variant->addr, "dest", self->srv2.port));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ EXPECT_EQ(0, close(sock_fd));
+
+ /* New layer that also denies connect but has the correct quiet bit. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr_2,
+ sizeof(ruleset_attr_2), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &quiet_rule, LANDLOCK_ADD_RULE_QUIET));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv2);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv2));
+
+ /* Quieted - no logs expected. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+static int matches_log_connect_bound(int audit_fd, const char *const blockers,
+ const char *const addr, __u16 lport,
+ __u16 dport)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s laddr=%s lport=%u daddr=%s dest=%u$";
+ /* Slack for the blockers, two addresses and two port numbers. */
+ char log_match[sizeof(log_template) + 60];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ blockers, addr, lport, addr, dport);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+/*
+ * After a bind() to an allowed port, a denied connect must report laddr/lport
+ * from the bound socket (made available through audit_net.sk) in addition to
+ * the connect sockaddr's daddr/dest.
+ */
+TEST_F(audit, connect_bound)
+{
+ const __u64 bind_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const char *const audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.connect_tcp" :
+ "net\\.connect_send_udp");
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = bind_right | conn_right,
+ };
+ const struct landlock_net_port_attr rule_bind = {
+ .allowed_access = bind_right,
+ .port = self->srv0.port,
+ };
+ struct service_fixture srv_remote;
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ /* Uses a second port as the denied connect target. */
+ ASSERT_EQ(0, set_service(&srv_remote, variant->prot, 1));
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(0, bind_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &srv_remote));
+ EXPECT_EQ(0, matches_log_connect_bound(self->audit_fd, audit_evt,
+ variant->addr, self->srv0.port,
+ srv_remote.port));
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
@@ -2026,4 +3132,60 @@ TEST_F(audit, connect)
EXPECT_EQ(0, close(sock_fd));
}
+TEST_F(audit, sendmsg)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP |
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv1.port,
+ };
+ struct audit_records records;
+ int ruleset_fd;
+ int sock_fd;
+
+ /* Sendmsg on stream sockets is never denied. */
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, sendto_variant(sock_fd, &self->srv0, "A", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.connect_send_udp",
+ "daddr", variant->addr, "dest",
+ self->srv0.port));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ /* Check that autobind generates a denied bind event. */
+ EXPECT_EQ(-EACCES, sendto_variant(sock_fd, &self->srv1, "A", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.bind_udp", NULL,
+ NULL, NULL, 0));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(-EACCES,
+ sendto_variant(sock_fd, &self->unspec_srv0, "B", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.connect_send_udp",
+ "daddr", NULL, "dest", 0));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 1b6c8b53bf33..4f64c90583cd 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -342,6 +342,7 @@ TEST_F(audit, trace)
/* Makes sure there is no superfluous logged records. */
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
yama_ptrace_scope = get_yama_ptrace_scope();
ASSERT_LE(0, yama_ptrace_scope);
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index c47491d2d1c1..40fc82fbf01d 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -293,6 +293,45 @@ FIXTURE_TEARDOWN_PARENT(scoped_audit)
EXPECT_EQ(0, audit_cleanup(-1, NULL));
}
+FIXTURE_VARIANT(scoped_audit)
+{
+ const __u64 scoped;
+ const __u64 quiet_scoped;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_audit, no_quiet)
+{
+ /* clang-format on */
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+ .quiet_scoped = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_abstract_socket)
+{
+ /* clang-format on */
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+ .quiet_scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_abstract_socket_2)
+{
+ /* clang-format on */
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL,
+ .quiet_scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
+ LANDLOCK_SCOPE_SIGNAL,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_unrelated)
+{
+ /* clang-format on */
+ .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL,
+ .quiet_scoped = LANDLOCK_SCOPE_SIGNAL,
+};
+
/* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */
#define ABSTRACT_SOCKET_PATH_PREFIX \
"0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D"
@@ -308,10 +347,18 @@ TEST_F(scoped_audit, connect_to_child)
char buf;
int dgram_client;
struct audit_records records;
+ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = variant->scoped,
+ .quiet_scoped = variant->quiet_scoped,
+ };
+ bool should_audit =
+ !(variant->quiet_scoped & LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
/* Makes sure there is no superfluous logged records. */
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
@@ -344,7 +391,14 @@ TEST_F(scoped_audit, connect_to_child)
EXPECT_EQ(0, close(pipe_child[1]));
EXPECT_EQ(0, close(pipe_parent[0]));
- create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
/* Signals that the parent is in a domain, if any. */
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
@@ -359,14 +413,20 @@ TEST_F(scoped_audit, connect_to_child)
EXPECT_EQ(-1, err_dgram);
EXPECT_EQ(EPERM, errno);
- EXPECT_EQ(
- 0,
- audit_match_record(
- self->audit_fd, AUDIT_LANDLOCK_ACCESS,
- REGEX_LANDLOCK_PREFIX
- " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
- "[0-9A-F]\\+$",
- NULL));
+ if (should_audit) {
+ EXPECT_EQ(
+ 0,
+ audit_match_record(
+ self->audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
+ "[0-9A-F]\\+$",
+ NULL));
+ }
+
+ /* No other logs */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
EXPECT_EQ(0, close(dgram_client));
diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c
index d8bf33417619..f24f2c28f62e 100644
--- a/tools/testing/selftests/landlock/scoped_signal_test.c
+++ b/tools/testing/selftests/landlock/scoped_signal_test.c
@@ -559,4 +559,186 @@ TEST_F(fown, sigurg_socket)
_metadata->exit_code = KSFT_FAIL;
}
+/*
+ * Checks that LANDLOCK_SCOPE_SIGNAL is enforced on the asynchronous SIGIO
+ * delivery path (fcntl(F_SETOWN)) when the file owner is a process group.
+ *
+ * A sandboxed process sitting at the head of its process group's PID hlist (the
+ * default position right after fork()) used to escape the fcntl(F_SETOWN,
+ * -pgrp) domain recording: pid_task(pgrp, PIDTYPE_PGID) resolved to the process
+ * itself, so the same-thread-group exemption skipped recording its Landlock
+ * domain. At SIGIO time that domain was then unset and the signal fanned out
+ * to every group member, including non-sandboxed processes outside the domain.
+ */
+TEST(sigio_to_pgid_members)
+{
+ int trigger[2], sync_child[2];
+ char buf;
+ pid_t child;
+ int status, i;
+
+ drop_caps(_metadata);
+
+ /*
+ * Isolates the test in its own process group so the SIGIO fan-out stays
+ * bounded to this parent and the child forked below.
+ */
+ ASSERT_EQ(0, setpgid(0, 0));
+
+ /* The non-sandboxed parent is the protected (out-of-domain) target. */
+ ASSERT_EQ(0, setup_signal_handler(SIGURG));
+ signal_received = 0;
+
+ ASSERT_EQ(0, pipe2(trigger, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(sync_child, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /*
+ * The child inherits the parent's new process group and, just
+ * attached with hlist_add_head_rcu(), is now the head of the
+ * pgid hlist: this is the case that used to skip the recording.
+ */
+ EXPECT_EQ(0, close(sync_child[0]));
+
+ /* In-domain positive control: the child must be signaled. */
+ ASSERT_EQ(0, setup_signal_handler(SIGURG));
+ signal_received = 0;
+
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ /* Owns the SIGIO source for the whole process group. */
+ ASSERT_EQ(0, fcntl(trigger[0], F_SETSIG, SIGURG));
+ ASSERT_EQ(0, fcntl(trigger[0], F_SETOWN, -getpgrp()));
+ ASSERT_EQ(0, fcntl(trigger[0], F_SETFL, O_ASYNC));
+
+ /* Fans SIGURG out to every member of the process group. */
+ ASSERT_EQ(1, write(trigger[1], ".", 1));
+
+ /*
+ * The sandboxed child is in its own domain and must always be
+ * signaled: this proves the SIGIO actually fired.
+ */
+ for (i = 0; i < 1000 && !signal_received; i++)
+ usleep(1000);
+ EXPECT_EQ(1, signal_received);
+
+ ASSERT_EQ(1, write(sync_child[1], ".", 1));
+ EXPECT_EQ(0, close(sync_child[1]));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(sync_child[1]));
+ EXPECT_EQ(0, close(trigger[0]));
+ EXPECT_EQ(0, close(trigger[1]));
+
+ /* Waits for the child to generate the SIGIO. */
+ ASSERT_EQ(1, read(sync_child[0], &buf, 1));
+ EXPECT_EQ(0, close(sync_child[0]));
+
+ /* Lets a delivered-but-pending signal run our handler, if any. */
+ for (i = 0; i < 100 && !signal_received; i++)
+ usleep(1000);
+
+ /*
+ * SCOPE_SIGNAL must block the fan-out to this non-sandboxed parent,
+ * which is outside the child's Landlock domain. Before the fix the
+ * parent was signaled here.
+ */
+ EXPECT_EQ(0, signal_received);
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+static void *thread_setown_scoped(void *arg)
+{
+ const int fd = *(int *)arg;
+ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+
+ /* Sandboxes only this non-leader thread (no thread syncing). */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ if (ruleset_fd < 0)
+ return (void *)THREAD_ERROR;
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ landlock_restrict_self(ruleset_fd, 0)) {
+ close(ruleset_fd);
+ return (void *)THREAD_ERROR;
+ }
+ close(ruleset_fd);
+
+ /* Makes this process group own the SIGIO source. */
+ if (fcntl(fd, F_SETSIG, SIGURG) || fcntl(fd, F_SETOWN, -getpgrp()) ||
+ fcntl(fd, F_SETFL, O_ASYNC))
+ return (void *)THREAD_ERROR;
+
+ return (void *)THREAD_SUCCESS;
+}
+
+/*
+ * Checks that the SIGIO fan-out is still delivered to the file owner's own
+ * process when fcntl(F_SETOWN, -pgrp) was issued from a sandboxed non-leader
+ * thread.
+ *
+ * The Landlock domain is recorded for a process-group owner (so out-of-domain
+ * members stay blocked, see sigio_to_pgid_members), but the kernel signals a
+ * process group through its members' thread-group leaders. Here the leader is
+ * not sandboxed and thus has a different domain than the registering thread, so
+ * the registration-time check cannot tell that it belongs to the owner's own
+ * process. hook_file_send_sigiotask() must recognize it through the recorded
+ * thread group and allow the delivery, matching the same-process guarantee of
+ * commit 18eb75f3af40. Without that exemption the leader is wrongly denied and
+ * never signaled.
+ */
+TEST(sigio_to_pgid_self)
+{
+ int trigger[2];
+ pthread_t thread;
+ enum thread_return ret = THREAD_INVALID;
+ int i;
+
+ drop_caps(_metadata);
+
+ /* Bounds the SIGIO fan-out to this process. */
+ ASSERT_EQ(0, setpgid(0, 0));
+
+ /* The non-sandboxed thread-group leader is the SIGIO target. */
+ ASSERT_EQ(0, setup_signal_handler(SIGURG));
+ signal_received = 0;
+
+ ASSERT_EQ(0, pipe2(trigger, O_CLOEXEC));
+
+ /*
+ * Registers the process-group fowner from a sibling thread that
+ * sandboxes only itself, so its domain differs from the leader's.
+ */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_setown_scoped,
+ &trigger[0]));
+ ASSERT_EQ(0, pthread_join(thread, (void **)&ret));
+ ASSERT_EQ(THREAD_SUCCESS, ret);
+
+ /* Fans SIGURG out to the process group. */
+ ASSERT_EQ(1, write(trigger[1], ".", 1));
+
+ for (i = 0; i < 1000 && !signal_received; i++)
+ usleep(1000);
+
+ /*
+ * Same-process delivery must always be allowed, even though the owner
+ * was registered from a sandboxed sibling thread.
+ */
+ EXPECT_EQ(1, signal_received);
+
+ EXPECT_EQ(0, close(trigger[0]));
+ EXPECT_EQ(0, close(trigger[1]));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 8ec0cb64ad94..30dc677b2f45 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -4,6 +4,8 @@
# Shell functions for the rest of the scripts.
+export LC_ALL=C
+
MAX_RETRIES=600
RETRY_INTERVAL=".1" # seconds
SYSFS_KERNEL_DIR="/sys/kernel"
@@ -339,6 +341,16 @@ function check_result {
fi
}
+# does_sysfs_exist(modname, attr) - check sysfs attribute existence
+# modname - livepatch module creating the sysfs interface
+# attr - attribute name to be checked
+function does_sysfs_exist() {
+ local mod="$1"; shift
+ local attr="$1"; shift
+
+ [[ -f "$SYSFS_KLP_DIR/$mod/$attr" ]]
+}
+
# check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs
# path permissions
# modname - livepatch module creating the sysfs interface
diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh
index b67dfad03d97..7ced4082cff3 100755
--- a/tools/testing/selftests/livepatch/test-kprobe.sh
+++ b/tools/testing/selftests/livepatch/test-kprobe.sh
@@ -20,11 +20,11 @@ start_test "livepatch interaction with kprobed function with post_handler"
echo 1 > "$SYSFS_KPROBES_DIR/enabled"
-load_mod $MOD_KPROBE has_post_handler=true
+load_mod $MOD_KPROBE has_post_handler=y
load_failing_mod $MOD_LIVEPATCH
unload_mod $MOD_KPROBE
-check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=true
+check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=y
% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
@@ -39,14 +39,14 @@ insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or
start_test "livepatch interaction with kprobed function without post_handler"
-load_mod $MOD_KPROBE has_post_handler=false
+load_mod $MOD_KPROBE has_post_handler=n
load_lp $MOD_LIVEPATCH
unload_mod $MOD_KPROBE
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=false
+check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=n
% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
index 58fe1d96997c..3b16285c6e67 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -8,6 +8,10 @@ MOD_LIVEPATCH=test_klp_livepatch
MOD_LIVEPATCH2=test_klp_callbacks_demo
MOD_LIVEPATCH3=test_klp_syscall
+HAS_PATCH_ATTR=0
+HAS_REPLACE_ATTR=0
+HAS_STACK_ORDER_ATTR=0
+
setup_config
# - load a livepatch and verifies the sysfs entries work as expected
@@ -20,13 +24,25 @@ check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x"
check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1"
check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
-check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
-check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--"
-check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "transition" "0"
-check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
-check_sysfs_value "$MOD_LIVEPATCH" "vmlinux/patched" "1"
+
+if does_sysfs_exist "$MOD_LIVEPATCH/vmlinux" "patched"; then
+ check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
+ check_sysfs_value "$MOD_LIVEPATCH" "vmlinux/patched" "1"
+ HAS_PATCH_ATTR=1
+fi
+
+if does_sysfs_exist "$MOD_LIVEPATCH" "replace"; then
+ check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+ HAS_REPLACE_ATTR=1
+fi
+
+if does_sysfs_exist "$MOD_LIVEPATCH" "stack_order"; then
+ check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--"
+ check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+ HAS_STACK_ORDER_ATTR=1
+fi
disable_lp $MOD_LIVEPATCH
@@ -45,123 +61,127 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-start_test "sysfs test object/patched"
+if [[ "$HAS_PATCH_ATTR" == "1" ]]; then
+ start_test "sysfs test object/patched"
-MOD_LIVEPATCH=test_klp_callbacks_demo
-MOD_TARGET=test_klp_callbacks_mod
-load_lp $MOD_LIVEPATCH
+ MOD_TARGET=test_klp_callbacks_mod
+ load_lp $MOD_LIVEPATCH2
-# check the "patch" file changes as target module loads/unloads
-check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
-load_mod $MOD_TARGET
-check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "1"
-unload_mod $MOD_TARGET
-check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
+ # check the "patch" file changes as target module loads/unloads
+ check_sysfs_value "$MOD_LIVEPATCH2" "$MOD_TARGET/patched" "0"
+ load_mod $MOD_TARGET
+ check_sysfs_value "$MOD_LIVEPATCH2" "$MOD_TARGET/patched" "1"
+ unload_mod $MOD_TARGET
+ check_sysfs_value "$MOD_LIVEPATCH2" "$MOD_TARGET/patched" "0"
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+ disable_lp $MOD_LIVEPATCH2
+ unload_lp $MOD_LIVEPATCH2
-check_result "% insmod test_modules/test_klp_callbacks_demo.ko
-livepatch: enabling patch 'test_klp_callbacks_demo'
-livepatch: 'test_klp_callbacks_demo': initializing patching transition
-test_klp_callbacks_demo: pre_patch_callback: vmlinux
-livepatch: 'test_klp_callbacks_demo': starting patching transition
-livepatch: 'test_klp_callbacks_demo': completing patching transition
-test_klp_callbacks_demo: post_patch_callback: vmlinux
-livepatch: 'test_klp_callbacks_demo': patching complete
-% insmod test_modules/test_klp_callbacks_mod.ko
-livepatch: applying patch 'test_klp_callbacks_demo' to loading module 'test_klp_callbacks_mod'
-test_klp_callbacks_demo: pre_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
-test_klp_callbacks_demo: post_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
-test_klp_callbacks_mod: test_klp_callbacks_mod_init
-% rmmod test_klp_callbacks_mod
-test_klp_callbacks_mod: test_klp_callbacks_mod_exit
-test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
-livepatch: reverting patch 'test_klp_callbacks_demo' on unloading module 'test_klp_callbacks_mod'
-test_klp_callbacks_demo: post_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
-% echo 0 > $SYSFS_KLP_DIR/test_klp_callbacks_demo/enabled
-livepatch: 'test_klp_callbacks_demo': initializing unpatching transition
-test_klp_callbacks_demo: pre_unpatch_callback: vmlinux
-livepatch: 'test_klp_callbacks_demo': starting unpatching transition
-livepatch: 'test_klp_callbacks_demo': completing unpatching transition
-test_klp_callbacks_demo: post_unpatch_callback: vmlinux
-livepatch: 'test_klp_callbacks_demo': unpatching complete
-% rmmod test_klp_callbacks_demo"
-
-start_test "sysfs test replace enabled"
-
-MOD_LIVEPATCH=test_klp_atomic_replace
-load_lp $MOD_LIVEPATCH replace=1
-
-check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
-check_sysfs_value "$MOD_LIVEPATCH" "replace" "1"
+ check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_TARGET.ko
+livepatch: applying patch '$MOD_LIVEPATCH2' to loading module '$MOD_TARGET'
+$MOD_LIVEPATCH2: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
+$MOD_LIVEPATCH2: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
+$MOD_TARGET: test_klp_callbacks_mod_init
+% rmmod $MOD_TARGET
+$MOD_TARGET: test_klp_callbacks_mod_exit
+$MOD_LIVEPATCH2: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
+livepatch: reverting patch '$MOD_LIVEPATCH2' on unloading module '$MOD_TARGET'
+$MOD_LIVEPATCH2: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2"
+fi
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+if [[ "$HAS_REPLACE_ATTR" == "1" ]]; then
+ start_test "sysfs test replace enabled"
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=1
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+ MOD_ATOMIC_REPLACE=test_klp_atomic_replace
+ load_lp $MOD_ATOMIC_REPLACE replace=1
-start_test "sysfs test replace disabled"
+ check_sysfs_rights "$MOD_ATOMIC_REPLACE" "replace" "-r--r--r--"
+ check_sysfs_value "$MOD_ATOMIC_REPLACE" "replace" "1"
-load_lp $MOD_LIVEPATCH replace=0
+ disable_lp $MOD_ATOMIC_REPLACE
+ unload_lp $MOD_ATOMIC_REPLACE
-check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
-check_sysfs_value "$MOD_LIVEPATCH" "replace" "0"
+ check_result "% insmod test_modules/$MOD_ATOMIC_REPLACE.ko replace=1
+livepatch: enabling patch '$MOD_ATOMIC_REPLACE'
+livepatch: '$MOD_ATOMIC_REPLACE': initializing patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': starting patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': completing patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_ATOMIC_REPLACE/enabled
+livepatch: '$MOD_ATOMIC_REPLACE': initializing unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': starting unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': completing unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': unpatching complete
+% rmmod $MOD_ATOMIC_REPLACE"
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+ start_test "sysfs test replace disabled"
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=0
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+ load_lp $MOD_ATOMIC_REPLACE replace=0
-start_test "sysfs test stack_order value"
+ check_sysfs_rights "$MOD_ATOMIC_REPLACE" "replace" "-r--r--r--"
+ check_sysfs_value "$MOD_ATOMIC_REPLACE" "replace" "0"
-load_lp $MOD_LIVEPATCH
+ disable_lp $MOD_ATOMIC_REPLACE
+ unload_lp $MOD_ATOMIC_REPLACE
-check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+ check_result "% insmod test_modules/$MOD_ATOMIC_REPLACE.ko replace=0
+livepatch: enabling patch '$MOD_ATOMIC_REPLACE'
+livepatch: '$MOD_ATOMIC_REPLACE': initializing patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': starting patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': completing patching transition
+livepatch: '$MOD_ATOMIC_REPLACE': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_ATOMIC_REPLACE/enabled
+livepatch: '$MOD_ATOMIC_REPLACE': initializing unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': starting unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': completing unpatching transition
+livepatch: '$MOD_ATOMIC_REPLACE': unpatching complete
+% rmmod $MOD_ATOMIC_REPLACE"
+fi
-load_lp $MOD_LIVEPATCH2
+if [[ "$HAS_STACK_ORDER_ATTR" == "1" ]]; then
+ start_test "sysfs test stack_order value"
-check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2"
+ load_lp $MOD_LIVEPATCH
-load_lp $MOD_LIVEPATCH3
+ check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
-check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3"
+ load_lp $MOD_LIVEPATCH2
-disable_lp $MOD_LIVEPATCH2
-unload_lp $MOD_LIVEPATCH2
+ check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2"
-check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
-check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2"
+ load_lp $MOD_LIVEPATCH3
-disable_lp $MOD_LIVEPATCH3
-unload_lp $MOD_LIVEPATCH3
+ check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3"
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+ disable_lp $MOD_LIVEPATCH2
+ unload_lp $MOD_LIVEPATCH2
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+ check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+ check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2"
+
+ disable_lp $MOD_LIVEPATCH3
+ unload_lp $MOD_LIVEPATCH3
+
+ disable_lp $MOD_LIVEPATCH
+ unload_lp $MOD_LIVEPATCH
+
+ check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
@@ -201,5 +221,6 @@ livepatch: '$MOD_LIVEPATCH': starting unpatching transition
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
+fi
exit 0
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
index dd802783ea84..08aacc0e14de 100644
--- a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
@@ -12,15 +12,26 @@
#include <linux/slab.h>
#include <linux/livepatch.h>
-#if defined(__x86_64__)
-#define FN_PREFIX __x64_
-#elif defined(__s390x__)
-#define FN_PREFIX __s390x_
-#elif defined(__aarch64__)
-#define FN_PREFIX __arm64_
+/*
+ * Before CONFIG_ARCH_HAS_SYSCALL_WRAPPER was introduced there were no
+ * prefixes for system calls.
+ * powerpc set this config based on configs, so it can be enabled or not.
+ */
+#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER)
+ #if defined(__x86_64__)
+ #define FN_PREFIX __x64_
+ #elif defined(__s390x__)
+ #define FN_PREFIX __s390x_
+ #elif defined(__aarch64__)
+ #define FN_PREFIX __arm64_
+ #elif defined(__powerpc__)
+ #define FN_PREFIX
+ #else
+ #error "Missing syscall wrapper for the given architecture."
+ #endif
#else
-/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER */
-#define FN_PREFIX
+ /* Do not set a prefix for architectures that do not enable wrappers. */
+ #define FN_PREFIX
#endif
/* Protects klp_pids */
@@ -98,7 +109,11 @@ static int livepatch_init(void)
*/
npids = npids_pending;
- return klp_enable_patch(&patch);
+ ret = klp_enable_patch(&patch);
+ if (ret)
+ kobject_put(klp_kobj);
+
+ return ret;
}
static void livepatch_exit(void)
diff --git a/tools/testing/selftests/liveupdate/Makefile b/tools/testing/selftests/liveupdate/Makefile
index 080754787ede..30689d22cb02 100644
--- a/tools/testing/selftests/liveupdate/Makefile
+++ b/tools/testing/selftests/liveupdate/Makefile
@@ -6,6 +6,8 @@ TEST_GEN_PROGS += liveupdate
TEST_GEN_PROGS_EXTENDED += luo_kexec_simple
TEST_GEN_PROGS_EXTENDED += luo_multi_session
+TEST_GEN_PROGS_EXTENDED += luo_stress_sessions
+TEST_GEN_PROGS_EXTENDED += luo_stress_files
TEST_FILES += do_kexec.sh
diff --git a/tools/testing/selftests/liveupdate/liveupdate.c b/tools/testing/selftests/liveupdate/liveupdate.c
index 37c808fbe1e9..502fb3567e38 100644
--- a/tools/testing/selftests/liveupdate/liveupdate.c
+++ b/tools/testing/selftests/liveupdate/liveupdate.c
@@ -26,6 +26,7 @@
#include <linux/liveupdate.h>
+#include "luo_test_utils.h"
#include "../kselftest.h"
#include "../kselftest_harness.h"
@@ -102,6 +103,22 @@ static int create_session(int lu_fd, const char *name)
return args.fd;
}
+/* Helper function to get a session name via ioctl. */
+static int get_session_name(int session_fd, char *name, size_t name_len)
+{
+ struct liveupdate_session_get_name args = {};
+
+ args.size = sizeof(args);
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_GET_NAME, &args))
+ return -errno;
+
+ strncpy(name, (char *)args.name, name_len - 1);
+ name[name_len - 1] = '\0';
+
+ return 0;
+}
+
/*
* Test Case: Create Duplicate Session
*
@@ -386,4 +403,175 @@ TEST_F(liveupdate_device, prevent_double_preservation)
ASSERT_EQ(close(session_fd2), 0);
}
+/*
+ * Test Case: Create Session with No Null Termination
+ *
+ * Verifies that filling the entire 64-byte name field with non-null characters
+ * (no '\0' terminator) is rejected by the kernel with EINVAL.
+ */
+TEST_F(liveupdate_device, create_session_no_null_termination)
+{
+ struct liveupdate_ioctl_create_session args = {};
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ /* Fill entire name field with 'X', no null terminator */
+ args.size = sizeof(args);
+ memset(args.name, 'X', sizeof(args.name));
+
+ EXPECT_LT(ioctl(self->fd1, LIVEUPDATE_IOCTL_CREATE_SESSION, &args), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+/*
+ * Test Case: Create Session with Empty Name
+ *
+ * Verifies that creating a session with an empty string name fails
+ * with EINVAL.
+ */
+TEST_F(liveupdate_device, create_session_empty_name)
+{
+ int session_fd;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "");
+ EXPECT_EQ(session_fd, -EINVAL);
+}
+
+/*
+ * Test Case: Get Session Name
+ *
+ * Verifies that the full session name can be retrieved from a session file
+ * descriptor via ioctl.
+ */
+TEST_F(liveupdate_device, get_session_name)
+{
+ char name_buf[LIVEUPDATE_SESSION_NAME_LENGTH] = {};
+ const char *session_name = "get-name-test-session";
+ int session_fd;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, session_name);
+ ASSERT_GE(session_fd, 0);
+
+ ASSERT_EQ(get_session_name(session_fd, name_buf, sizeof(name_buf)), 0);
+ ASSERT_STREQ(name_buf, session_name);
+
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+/*
+ * Test Case: Get Session Name at Maximum Length
+ *
+ * Verifies that a session name using the full LIVEUPDATE_SESSION_NAME_LENGTH
+ * (minus the null terminator) can be correctly retrieved.
+ */
+TEST_F(liveupdate_device, get_session_name_max_length)
+{
+ char name_buf[LIVEUPDATE_SESSION_NAME_LENGTH] = {};
+ char long_name[LIVEUPDATE_SESSION_NAME_LENGTH];
+ int session_fd;
+
+ memset(long_name, 'A', sizeof(long_name) - 1);
+ long_name[sizeof(long_name) - 1] = '\0';
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, long_name);
+ ASSERT_GE(session_fd, 0);
+
+ ASSERT_EQ(get_session_name(session_fd, name_buf, sizeof(name_buf)), 0);
+ ASSERT_STREQ(name_buf, long_name);
+
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+/*
+ * Test Case: Manage Many Sessions
+ *
+ * Verifies that a large number of sessions can be created and then
+ * destroyed during normal system operation. This specifically tests the
+ * dynamic block allocation and reuse logic for session metadata management
+ * without preserving any files.
+ */
+TEST_F(liveupdate_device, preserve_many_sessions)
+{
+#define MANY_SESSIONS 2000
+ int session_fds[MANY_SESSIONS];
+ int ret, i;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ ret = luo_ensure_nofile_limit(MANY_SESSIONS);
+ if (ret == -EPERM)
+ SKIP(return, "Insufficient privileges to set RLIMIT_NOFILE");
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < MANY_SESSIONS; i++) {
+ char name[64];
+
+ snprintf(name, sizeof(name), "many-session-%d", i);
+ session_fds[i] = create_session(self->fd1, name);
+ ASSERT_GE(session_fds[i], 0);
+ }
+
+ for (i = 0; i < MANY_SESSIONS; i++)
+ ASSERT_EQ(close(session_fds[i]), 0);
+}
+
+/*
+ * Test Case: Preserve Many Files
+ *
+ * Verifies that a large number of files can be preserved in a single session
+ * and then destroyed during normal system operation. This tests the dynamic
+ * block allocation and management for outgoing files.
+ */
+TEST_F(liveupdate_device, preserve_many_files)
+{
+#define MANY_FILES 500
+ int mem_fds[MANY_FILES];
+ int session_fd, ret, i;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "many-files-test");
+ ASSERT_GE(session_fd, 0);
+
+ ret = luo_ensure_nofile_limit(MANY_FILES + 10);
+ if (ret == -EPERM)
+ SKIP(return, "Insufficient privileges to set RLIMIT_NOFILE");
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < MANY_FILES; i++) {
+ mem_fds[i] = memfd_create("test-memfd", 0);
+ ASSERT_GE(mem_fds[i], 0);
+ ASSERT_EQ(preserve_fd(session_fd, mem_fds[i], i), 0);
+ }
+
+ for (i = 0; i < MANY_FILES; i++)
+ ASSERT_EQ(close(mem_fds[i]), 0);
+
+ ASSERT_EQ(close(session_fd), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/liveupdate/luo_stress_files.c b/tools/testing/selftests/liveupdate/luo_stress_files.c
new file mode 100644
index 000000000000..0cdf9cd4bac7
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_stress_files.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2026, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Validate that LUO can handle a large number of files per session across
+ * a kexec reboot.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include "luo_test_utils.h"
+
+#define NUM_FILES 500
+#define STATE_SESSION_NAME "kexec_many_files_state"
+#define STATE_MEMFD_TOKEN 9999
+#define TEST_SESSION_NAME "many_files_session"
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int session_fd, i;
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating test session '%s'...\n", TEST_SESSION_NAME);
+ session_fd = luo_create_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_create_session");
+
+ ksft_print_msg("[STAGE 1] Preserving %d files...\n", NUM_FILES);
+ for (i = 0; i < NUM_FILES; i++) {
+ char data[64];
+
+ snprintf(data, sizeof(data), "file-data-%d", i);
+ if (create_and_preserve_memfd(session_fd, i, data) < 0)
+ fail_exit("create_and_preserve_memfd for index %d", i);
+ }
+
+ ksft_print_msg("[STAGE 1] Successfully preserved %d files.\n", NUM_FILES);
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int session_fd;
+ int i, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2) {
+ fail_exit("Expected stage 2, but state file contains %d",
+ stage);
+ }
+
+ ksft_print_msg("[STAGE 2] Retrieving test session '%s'...\n", TEST_SESSION_NAME);
+ session_fd = luo_retrieve_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_retrieve_session");
+
+ ksft_print_msg("[STAGE 2] Verifying %d files...\n", NUM_FILES);
+ for (i = 0; i < NUM_FILES; i++) {
+ char data[64];
+ int fd;
+
+ snprintf(data, sizeof(data), "file-data-%d", i);
+ fd = restore_and_verify_memfd(session_fd, i, data);
+ if (fd < 0)
+ fail_exit("restore_and_verify_memfd for index %d", i);
+ close(fd);
+ }
+
+ ksft_print_msg("[STAGE 2] Finishing test session...\n");
+ if (luo_session_finish(session_fd) < 0)
+ fail_exit("luo_session_finish for test session");
+ close(session_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- MANY-FILES KEXEC TEST PASSED (%d files) ---\n",
+ NUM_FILES);
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_stress_sessions.c b/tools/testing/selftests/liveupdate/luo_stress_sessions.c
new file mode 100644
index 000000000000..f201b1839d1d
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_stress_sessions.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2026, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Validate that LUO can handle a large number of sessions across a kexec
+ * reboot.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include "luo_test_utils.h"
+
+#define NUM_SESSIONS 2000
+#define STATE_SESSION_NAME "kexec_many_state"
+#define STATE_MEMFD_TOKEN 999
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int ret, i;
+
+ ksft_print_msg("[STAGE 1] Increasing ulimit for open files...\n");
+ ret = luo_ensure_nofile_limit(NUM_SESSIONS);
+ if (ret == -EPERM)
+ ksft_exit_skip("Insufficient privileges to set RLIMIT_NOFILE\n");
+ if (ret < 0)
+ ksft_exit_fail_msg("luo_ensure_nofile_limit failed: %s\n", strerror(-ret));
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating %d sessions...\n", NUM_SESSIONS);
+
+ for (i = 0; i < NUM_SESSIONS; i++) {
+ char name[LIVEUPDATE_SESSION_NAME_LENGTH];
+ int s_fd;
+
+ snprintf(name, sizeof(name), "many-test-%d", i);
+ s_fd = luo_create_session(luo_fd, name);
+ if (s_fd < 0) {
+ fail_exit("luo_create_session for '%s' at index %d",
+ name, i);
+ }
+ }
+
+ ksft_print_msg("[STAGE 1] Successfully created %d sessions.\n",
+ NUM_SESSIONS);
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int i, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2) {
+ fail_exit("Expected stage 2, but state file contains %d",
+ stage);
+ }
+
+ ksft_print_msg("[STAGE 2] Retrieving and finishing %d sessions...\n",
+ NUM_SESSIONS);
+
+ for (i = 0; i < NUM_SESSIONS; i++) {
+ char name[LIVEUPDATE_SESSION_NAME_LENGTH];
+ int s_fd;
+
+ snprintf(name, sizeof(name), "many-test-%d", i);
+ s_fd = luo_retrieve_session(luo_fd, name);
+ if (s_fd < 0) {
+ fail_exit("luo_retrieve_session for '%s' at index %d",
+ name, i);
+ }
+
+ if (luo_session_finish(s_fd) < 0) {
+ fail_exit("luo_session_finish for '%s' at index %d",
+ name, i);
+ }
+ close(s_fd);
+ }
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- MANY-SESSIONS KEXEC TEST PASSED (%d sessions) ---\n",
+ NUM_SESSIONS);
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.c b/tools/testing/selftests/liveupdate/luo_test_utils.c
index 3c8721c505df..333a3530051b 100644
--- a/tools/testing/selftests/liveupdate/luo_test_utils.c
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.c
@@ -17,6 +17,7 @@
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/types.h>
+#include <sys/resource.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdarg.h>
@@ -28,6 +29,29 @@ int luo_open_device(void)
return open(LUO_DEVICE, O_RDWR);
}
+int luo_ensure_nofile_limit(long min_limit)
+{
+ struct rlimit hl;
+
+ /* Allow to extra files to be used by test itself */
+ min_limit += 32;
+
+ if (getrlimit(RLIMIT_NOFILE, &hl) < 0)
+ return -errno;
+
+ if (hl.rlim_cur >= min_limit)
+ return 0;
+
+ hl.rlim_cur = min_limit;
+ if (hl.rlim_cur > hl.rlim_max)
+ hl.rlim_max = hl.rlim_cur;
+
+ if (setrlimit(RLIMIT_NOFILE, &hl) < 0)
+ return -errno;
+
+ return 0;
+}
+
int luo_create_session(int luo_fd, const char *name)
{
struct liveupdate_ioctl_create_session arg = { .size = sizeof(arg) };
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.h b/tools/testing/selftests/liveupdate/luo_test_utils.h
index 90099bf49577..6a0d85386613 100644
--- a/tools/testing/selftests/liveupdate/luo_test_utils.h
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.h
@@ -26,6 +26,8 @@ int luo_create_session(int luo_fd, const char *name);
int luo_retrieve_session(int luo_fd, const char *name);
int luo_session_finish(int session_fd);
+int luo_ensure_nofile_limit(long min_limit);
+
int create_and_preserve_memfd(int session_fd, int token, const char *data);
int restore_and_verify_memfd(int session_fd, int token, const char *expected_data);
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index dbc171a3806d..510056c1b0d0 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -162,7 +162,7 @@ static void *global_p = NULL;
static int sealing_thread_fn(void *arg)
{
- int sig, r;
+ int r;
/*
* This thread first waits 200ms so any pending operation in the parent
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 2ca07ea7202a..cdab3a837624 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -688,9 +688,9 @@ static void mfd_assert_grow_write(int fd)
if (hugetlbfs_test)
return;
- buf = malloc(mfd_def_size * 8);
+ buf = calloc(1, mfd_def_size * 8);
if (!buf) {
- printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+ printf("calloc(1, %zu) failed: %m\n", mfd_def_size * 8);
abort();
}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index b0c30c5ee9e3..9ccd9e1447e6 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -4,6 +4,10 @@ hugepage-mmap
hugepage-mremap
hugepage-shm
hugepage-vmemmap
+hugetlb-mmap
+hugetlb-mremap
+hugetlb-shm
+hugetlb-vmemmap
hugetlb-madvise
hugetlb-read-hwpoison
hugetlb-soft-offline
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index cd24596cdd27..e6df968f0971 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -61,16 +61,15 @@ TEST_GEN_FILES += gup_longterm
TEST_GEN_FILES += gup_test
TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugetlb-madvise
+TEST_GEN_FILES += hugetlb-mmap
+TEST_GEN_FILES += hugetlb-mremap
TEST_GEN_FILES += hugetlb-read-hwpoison
+TEST_GEN_FILES += hugetlb-shm
TEST_GEN_FILES += hugetlb-soft-offline
-TEST_GEN_FILES += hugepage-mmap
-TEST_GEN_FILES += hugepage-mremap
-TEST_GEN_FILES += hugepage-shm
-TEST_GEN_FILES += hugepage-vmemmap
+TEST_GEN_FILES += hugetlb-vmemmap
TEST_GEN_FILES += khugepaged
TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
-TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64))
TEST_GEN_FILES += memfd_secret
@@ -151,6 +150,7 @@ TEST_PROGS += ksft_gup_test.sh
TEST_PROGS += ksft_hmm.sh
TEST_PROGS += ksft_hugetlb.sh
TEST_PROGS += ksft_hugevm.sh
+TEST_PROGS += ksft_kmemleak_dedup.sh
TEST_PROGS += ksft_ksm.sh
TEST_PROGS += ksft_ksm_numa.sh
TEST_PROGS += ksft_madv_guard.sh
@@ -187,8 +187,8 @@ TEST_FILES += write_hugetlb_memory.sh
include ../lib.mk
-$(TEST_GEN_PROGS): vm_util.c thp_settings.c
-$(TEST_GEN_FILES): vm_util.c thp_settings.c
+$(TEST_GEN_PROGS): vm_util.c hugepage_settings.c
+$(TEST_GEN_FILES): vm_util.c hugepage_settings.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
@@ -216,7 +216,8 @@ ifeq ($(CAN_BUILD_I386),1)
$(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
- $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
@@ -224,7 +225,8 @@ ifeq ($(CAN_BUILD_X86_64),1)
$(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
- $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
@@ -261,7 +263,8 @@ $(OUTPUT)/migration: LDLIBS += -lnuma
$(OUTPUT)/rmap: LDLIBS += -lnuma
local_config.mk local_config.h: check_config.sh
- CC="$(CC)" CFLAGS="$(CFLAGS)" ./check_config.sh
+ $(call msg,CHK,config,$@)
+ $(Q)CC="$(CC)" CFLAGS="$(CFLAGS)" ./check_config.sh
EXTRA_CLEAN += local_config.mk local_config.h
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 44f4e703deb9..a1cfd3a349db 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -17,6 +17,7 @@ if ! command -v killall >/dev/null 2>&1; then
fi
nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+trap 'echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages' EXIT INT TERM
fault_limit_file=limit_in_bytes
reservation_limit_file=rsvd.limit_in_bytes
@@ -70,7 +71,6 @@ function cleanup() {
if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then
rmdir $cgroup_path/hugetlb_cgroup_test2
fi
- echo 0 >/proc/sys/vm/nr_hugepages
echo CLEANUP DONE
}
@@ -94,6 +94,15 @@ function get_machine_hugepage_size() {
}
MB=$(get_machine_hugepage_size)
+if (( MB >= 1024 )); then
+ # For 1GB hugepages
+ UNIT="GB"
+ MB_DISPLAY=$((MB / 1024))
+else
+ # For 2MB hugepages
+ UNIT="MB"
+ MB_DISPLAY=$MB
+fi
function setup_cgroup() {
local name="$1"
@@ -103,11 +112,12 @@ function setup_cgroup() {
mkdir $cgroup_path/$name
echo writing cgroup limit: "$cgroup_limit"
- echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
+ echo "$cgroup_limit" > \
+ $cgroup_path/$name/hugetlb.${MB_DISPLAY}${UNIT}.$fault_limit_file
echo writing reservation limit: "$reservation_limit"
echo "$reservation_limit" > \
- $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
+ $cgroup_path/$name/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_limit_file
if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then
echo 0 >$cgroup_path/$name/cpuset.cpus
@@ -142,7 +152,7 @@ function wait_for_file_value() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file"
wait_for_file_value "$path" "0"
}
@@ -150,7 +160,7 @@ function wait_for_hugetlb_memory_to_get_depleted() {
function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file"
wait_for_file_value "$path" "$size"
}
@@ -158,7 +168,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB_DISPLAY}${UNIT}.$fault_usage_file"
wait_for_file_value "$path" "$size"
}
@@ -180,8 +190,8 @@ function write_hugetlbfs_and_get_usage() {
hugetlb_difference=0
reserved_difference=0
- local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file
- local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file
+ local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB_DISPLAY}${UNIT}.$fault_usage_file
+ local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file
local hugetlb_before=$(cat $hugetlb_usage)
local reserved_before=$(cat $reserved_usage)
@@ -312,8 +322,10 @@ function run_test() {
cleanup_hugetlb_memory "hugetlb_cgroup_test"
- local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file)
- local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file)
+ local final_hugetlb=$(cat \
+ $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB_DISPLAY}${UNIT}.$fault_usage_file)
+ local final_reservation=$(cat \
+ $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file)
echo $hugetlb_difference
echo $reserved_difference
@@ -369,10 +381,14 @@ function run_multiple_cgroup_test() {
reservation_failed1=$reservation_failed
oom_killed1=$oom_killed
- local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file
- local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file
- local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file
- local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file
+ local cgroup1_hugetlb_usage=\
+ $cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB_DISPLAY}${UNIT}.$fault_usage_file
+ local cgroup1_reservation_usage=\
+ $cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file
+ local cgroup2_hugetlb_usage=\
+ $cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB_DISPLAY}${UNIT}.$fault_usage_file
+ local cgroup2_reservation_usage=\
+ $cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB_DISPLAY}${UNIT}.$reservation_usage_file
local usage_before_second_write=$(cat $cgroup1_hugetlb_usage)
local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage)
@@ -599,4 +615,3 @@ if [[ $do_umount ]]; then
rmdir $cgroup_path
fi
-echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
index b84c82bbf875..32beaefe279e 100755
--- a/tools/testing/selftests/mm/check_config.sh
+++ b/tools/testing/selftests/mm/check_config.sh
@@ -16,7 +16,7 @@ echo "#include <sys/types.h>" > $tmpfile_c
echo "#include <liburing.h>" >> $tmpfile_c
echo "int func(void) { return 0; }" >> $tmpfile_c
-$CC $CFLAGS -c $tmpfile_c -o $tmpfile_o
+$CC $CFLAGS -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
if [ -f $tmpfile_o ]; then
echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 30209c40b697..5b582588e015 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -17,6 +17,7 @@
#include <string.h>
#include "kselftest.h"
+#include "hugepage_settings.h"
#define MAP_SIZE_MB 100
#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
@@ -82,124 +83,44 @@ int prereq(void)
return -1;
}
-int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
- unsigned long initial_nr_hugepages)
+int check_compaction(unsigned long mem_free, unsigned long hugepage_size)
{
- unsigned long nr_hugepages_ul;
- int fd, ret = -1;
+ unsigned long nr_hugepages;
int compaction_index = 0;
- char nr_hugepages[20] = {0};
- char init_nr_hugepages[24] = {0};
- char target_nr_hugepages[24] = {0};
- int slen;
-
- snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
- "%lu", initial_nr_hugepages);
+ int ret = -1;
/* We want to test with 80% of available memory. Else, OOM killer comes
in to play */
mem_free = mem_free * 0.8;
- fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
- if (fd < 0) {
- ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- ret = -1;
- goto out;
- }
-
/*
* Request huge pages for about half of the free memory. The Kernel
* will allocate as much as it can, and we expect it will get at least 1/3
*/
- nr_hugepages_ul = mem_free / hugepage_size / 2;
- snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
- "%lu", nr_hugepages_ul);
-
- slen = strlen(target_nr_hugepages);
- if (write(fd, target_nr_hugepages, slen) != slen) {
- ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
- nr_hugepages_ul, strerror(errno));
- goto close_fd;
- }
-
- lseek(fd, 0, SEEK_SET);
-
- if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
- ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
- }
+ nr_hugepages = mem_free / hugepage_size / 2;
+ hugetlb_set_nr_default_pages(nr_hugepages);
/* We should have been able to request at least 1/3 rd of the memory in
huge pages */
- nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10);
- if (!nr_hugepages_ul) {
+ nr_hugepages = hugetlb_nr_default_pages();
+ if (!nr_hugepages) {
ksft_print_msg("ERROR: No memory is available as huge pages\n");
- goto close_fd;
- }
- compaction_index = mem_free/(nr_hugepages_ul * hugepage_size);
-
- lseek(fd, 0, SEEK_SET);
-
- if (write(fd, init_nr_hugepages, strlen(init_nr_hugepages))
- != strlen(init_nr_hugepages)) {
- ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
+ goto out;
}
+ compaction_index = mem_free/(nr_hugepages * hugepage_size);
- ksft_print_msg("Number of huge pages allocated = %lu\n",
- nr_hugepages_ul);
+ ksft_print_msg("Number of huge pages allocated = %lu\n", nr_hugepages);
if (compaction_index > 3) {
ksft_print_msg("ERROR: Less than 1/%d of memory is available\n"
"as huge pages\n", compaction_index);
- goto close_fd;
- }
-
- ret = 0;
-
- close_fd:
- close(fd);
- out:
- ksft_test_result(ret == 0, "check_compaction\n");
- return ret;
-}
-
-int set_zero_hugepages(unsigned long *initial_nr_hugepages)
-{
- int fd, ret = -1;
- char nr_hugepages[20] = {0};
-
- fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
- if (fd < 0) {
- ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
goto out;
}
- if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
- ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
- }
-
- lseek(fd, 0, SEEK_SET);
-
- /* Start with the initial condition of 0 huge pages */
- if (write(fd, "0", sizeof(char)) != sizeof(char)) {
- ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
- }
- *initial_nr_hugepages = strtoul(nr_hugepages, NULL, 10);
ret = 0;
- close_fd:
- close(fd);
-
out:
+ ksft_test_result(ret == 0, "check_compaction\n");
return ret;
}
@@ -212,18 +133,17 @@ int main(int argc, char **argv)
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
long mem_fragmentable_MB = 0;
- unsigned long initial_nr_hugepages;
ksft_print_header();
if (prereq() || geteuid())
ksft_exit_skip("Prerequisites unsatisfied\n");
- ksft_set_plan(1);
-
/* Start the test without hugepages reducing mem_free */
- if (set_zero_hugepages(&initial_nr_hugepages))
- ksft_exit_fail();
+ if (!hugetlb_setup_default_exact(0))
+ ksft_exit_skip("Could not reset nr_hugepages\n");
+
+ ksft_set_plan(1);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
@@ -261,6 +181,9 @@ int main(int argc, char **argv)
mem_fragmentable_MB -= MAP_SIZE_MB;
}
+ /* Unmap every other entry in the list to create fragmentation with
+ * locked pages before invoking check_compaction().
+ */
for (entry = list; entry != NULL; entry = entry->next) {
munmap(entry->map, MAP_SIZE);
if (!entry->next)
@@ -268,8 +191,7 @@ int main(int argc, char **argv)
entry = entry->next;
}
- if (check_compaction(mem_free, hugepage_size,
- initial_nr_hugepages) == 0)
+ if (check_compaction(mem_free, hugepage_size) == 0)
ksft_exit_pass();
ksft_exit_fail();
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index d9c69c04b67d..0c627ea89ff7 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -29,7 +29,7 @@
#include "../../../../mm/gup_test.h"
#include "kselftest.h"
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
static size_t pagesize;
static int pagemap_fd;
@@ -37,7 +37,7 @@ static size_t pmdsize;
static int nr_thpsizes;
static size_t thpsizes[20];
static int nr_hugetlbsizes;
-static size_t hugetlbsizes[10];
+static unsigned long hugetlbsizes[10];
static int gup_fd;
static bool has_huge_zeropage;
@@ -202,7 +202,7 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
- exit(fn(mem, size, &comm_pipes));
+ _exit(fn(mem, size, &comm_pipes));
}
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
@@ -333,7 +333,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
;
/* Modify page content in the child. */
memset(mem, 0xff, size);
- exit(0);
+ _exit(0);
}
if (!before_fork) {
@@ -480,7 +480,7 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
write(comm_pipes.child_ready[1], "0", 1);
while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
;
- exit(0);
+ _exit(0);
}
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
@@ -645,7 +645,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
write(comm_pipes.child_ready[1], "0", 1);
while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
;
- exit(0);
+ _exit(0);
}
/* Wait until our child is ready. */
@@ -956,7 +956,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
log_test_result(KSFT_FAIL);
goto munmap;
} else if (!ret) {
- exit(0);
+ _exit(0);
}
wait(&ret);
/* Allow for sharing all pages again. */
@@ -1347,13 +1347,13 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
switch (test) {
case ANON_THP_COLLAPSE_UNSHARED:
case ANON_THP_COLLAPSE_FULLY_SHARED:
- exit(child_memcmp_fn(mem, size, &comm_pipes));
+ _exit(child_memcmp_fn(mem, size, &comm_pipes));
break;
case ANON_THP_COLLAPSE_LOWER_SHARED:
- exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
+ _exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
break;
case ANON_THP_COLLAPSE_UPPER_SHARED:
- exit(child_memcmp_fn(mem + size / 2, size / 2,
+ _exit(child_memcmp_fn(mem + size / 2, size / 2,
&comm_pipes));
break;
default:
@@ -1881,21 +1881,21 @@ int main(int argc, char **argv)
ksft_print_header();
+ thp_save_settings();
+
pagesize = getpagesize();
pmdsize = read_pmd_pagesize();
if (pmdsize) {
/* Only if THP is supported. */
thp_read_settings(&default_settings);
default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
- thp_save_settings();
thp_push_settings(&default_settings);
ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
pmdsize / 1024);
nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
}
- nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
- ARRAY_SIZE(hugetlbsizes));
+ nr_hugetlbsizes = hugetlb_setup(2, hugetlbsizes, ARRAY_SIZE(hugetlbsizes));
has_huge_zeropage = detect_huge_zeropage();
ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
@@ -1911,10 +1911,5 @@ int main(int argc, char **argv)
run_anon_thp_test_cases();
run_non_anon_test_cases();
- if (pmdsize) {
- /* Only if THP is supported. */
- thp_restore_settings();
- }
-
ksft_finished();
}
diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c
index 44940f75c461..57e1b6fc5569 100644
--- a/tools/testing/selftests/mm/droppable.c
+++ b/tools/testing/selftests/mm/droppable.c
@@ -17,37 +17,50 @@
int main(int argc, char *argv[])
{
- size_t alloc_size = 134217728;
- size_t page_size = getpagesize();
+ const size_t alloc_size = 2 * 1024 * 1024;
+ int retry_count = 10;
+ bool dropped;
void *alloc;
- pid_t child;
ksft_print_header();
ksft_set_plan(1);
alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0);
- assert(alloc != MAP_FAILED);
- memset(alloc, 'A', alloc_size);
- for (size_t i = 0; i < alloc_size; i += page_size)
- assert(*(uint8_t *)(alloc + i));
-
- child = fork();
- assert(child >= 0);
- if (!child) {
- for (;;)
- *(char *)malloc(page_size) = 'B';
+ if (alloc == MAP_FAILED) {
+ if ((errno == EOPNOTSUPP) || (errno == EINVAL)) {
+ ksft_test_result_skip("MAP_DROPPABLE not supported\n");
+ exit(KSFT_SKIP);
+ }
+ ksft_test_result_fail("mmap error: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
}
+ memset(alloc, 'A', alloc_size);
- for (bool done = false; !done;) {
- for (size_t i = 0; i < alloc_size; i += page_size) {
- if (!*(uint8_t *)(alloc + i)) {
- done = true;
- break;
+ while (retry_count--) {
+ if (madvise(alloc, alloc_size, MADV_PAGEOUT)) {
+ if (errno == EINVAL) {
+ ksft_test_result_skip("madvise(MADV_PAGEOUT) not supported\n");
+ exit(KSFT_SKIP);
}
+ ksft_test_result_fail("madvise(MADV_PAGEOUT) error: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
}
+
+ dropped = memchr(alloc, 'A', alloc_size) == NULL;
+
+ /*
+ * Speculative reference can temporarily prevent some
+ * pages from getting dropped. So sleep and retry.
+ *
+ * If a page is not droppable for 10s, something
+ * is seriously messed up and we want to fail.
+ */
+ if (dropped)
+ break;
+ sleep(1);
}
- kill(child, SIGTERM);
- ksft_test_result_pass("MAP_DROPPABLE: PASS\n");
- exit(KSFT_PASS);
+ ksft_test_result(dropped, "madvise(MADV_PAGEOUT) behavior\n");
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/folio_split_race_test.c b/tools/testing/selftests/mm/folio_split_race_test.c
index ff026f183ac7..6329e37fff4c 100644
--- a/tools/testing/selftests/mm/folio_split_race_test.c
+++ b/tools/testing/selftests/mm/folio_split_race_test.c
@@ -25,7 +25,7 @@
#include <unistd.h>
#include "vm_util.h"
#include "kselftest.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
uint64_t page_size;
uint64_t pmd_pagesize;
@@ -226,23 +226,6 @@ static uint64_t run_iteration(void)
return reader_failures;
}
-static void thp_cleanup_handler(int signum)
-{
- thp_restore_settings();
- /*
- * Restore default handler and re-raise the signal to exit.
- * This is to ensure the test process exits with the correct
- * status code corresponding to the signal.
- */
- signal(signum, SIG_DFL);
- raise(signum);
-}
-
-static void thp_settings_cleanup(void)
-{
- thp_restore_settings();
-}
-
int main(void)
{
struct thp_settings current_settings;
@@ -261,12 +244,6 @@ int main(void)
ksft_exit_skip("Please run the test as root\n");
thp_save_settings();
- /* make sure thp settings are restored */
- if (atexit(thp_settings_cleanup) != 0)
- ksft_exit_fail_msg("atexit failed\n");
-
- signal(SIGINT, thp_cleanup_handler);
- signal(SIGTERM, thp_cleanup_handler);
thp_read_settings(&current_settings);
current_settings.shmem_enabled = SHMEM_ADVISE;
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 48e8b1539be3..b21df3040b1c 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -21,7 +21,7 @@
#include <sys/uio.h>
#include <unistd.h>
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#include "../pidfd/pidfd.h"
@@ -2203,17 +2203,6 @@ TEST_F(guard_regions, collapse)
if (variant->backing != ANON_BACKED)
ASSERT_EQ(ftruncate(self->fd, size), 0);
- /*
- * We must close and re-open local-file backed as read-only for
- * CONFIG_READ_ONLY_THP_FOR_FS to work.
- */
- if (variant->backing == LOCAL_FILE_BACKED) {
- ASSERT_EQ(close(self->fd), 0);
-
- self->fd = open(self->path, O_RDONLY);
- ASSERT_GE(self->fd, 0);
- }
-
ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
@@ -2237,9 +2226,10 @@ TEST_F(guard_regions, collapse)
/*
* Now collapse the entire region. This should fail in all cases.
*
- * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is
- * not set for the local file case, but we can't differentiate whether
- * this occurred or if the collapse was rightly rejected.
+ * The madvise() call will also fail if the file system does not support
+ * large folio or the supported orders do not include PMD_ORDER for the
+ * local file case, but we can't differentiate whether this occurred or
+ * if the collapse was rightly rejected.
*/
EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0);
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index f61150d28eb2..eb8963e9d98f 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -29,10 +29,11 @@
#include "../../../../mm/gup_test.h"
#include "kselftest.h"
#include "vm_util.h"
+#include "hugepage_settings.h"
static size_t pagesize;
static int nr_hugetlbsizes;
-static size_t hugetlbsizes[10];
+static unsigned long hugetlbsizes[10];
static int gup_fd;
static __fsword_t get_fs_type(int fd)
@@ -509,7 +510,7 @@ int main(int argc, char **argv)
int i;
pagesize = getpagesize();
- nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ nr_hugetlbsizes = hugetlb_setup(2, hugetlbsizes,
ARRAY_SIZE(hugetlbsizes));
ksft_print_header();
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index fb8f9ae49efa..3f841a96f870 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -14,6 +14,7 @@
#include <mm/gup_test.h>
#include "kselftest.h"
#include "vm_util.h"
+#include "hugepage_settings.h"
#define MB (1UL << 20)
@@ -94,6 +95,7 @@ int main(int argc, char **argv)
int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
int flags = MAP_PRIVATE;
char *file = "/dev/zero";
+ bool hugetlb = false;
pthread_t *tid;
char *p;
@@ -168,6 +170,7 @@ int main(int argc, char **argv)
break;
case 'H':
flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
+ hugetlb = true;
break;
default:
ksft_exit_fail_msg("Wrong argument\n");
@@ -199,6 +202,18 @@ int main(int argc, char **argv)
}
ksft_print_header();
+
+ if (hugetlb) {
+ unsigned long hp_size = default_huge_page_size();
+
+ if (!hp_size)
+ ksft_exit_skip("HugeTLB is unavailable\n");
+
+ size = (size + hp_size - 1) & ~(hp_size - 1);
+ if (!hugetlb_setup_default(size / hp_size))
+ ksft_exit_skip("Not enough huge pages\n");
+ }
+
ksft_set_plan(nthreads);
filed = open(file, O_RDWR|O_CREAT, 0664);
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 77fb4c5d871b..e4c49699f3f7 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -11,6 +11,7 @@
*/
#include "kselftest_harness.h"
+#include "hugepage_settings.h"
#include <errno.h>
#include <fcntl.h>
@@ -21,13 +22,13 @@
#include <strings.h>
#include <time.h>
#include <pthread.h>
+#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/time.h>
-
/*
* This is a private UAPI to the kernel test module so it isn't exported
* in the usual include/uapi/... directory.
@@ -69,6 +70,9 @@ enum {
#ifndef FOLL_LONGTERM
#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
#endif
+
+HUGETLB_SETUP_DEFAULT_PAGES(1)
+
FIXTURE(hmm)
{
int fd;
@@ -632,7 +636,7 @@ TEST_F(hmm, anon_write_child)
}
close(child_fd);
- exit(0);
+ _exit(0);
}
}
}
@@ -712,7 +716,7 @@ TEST_F(hmm, anon_write_child_shared)
ASSERT_EQ(ptr[i], -i);
close(child_fd);
- exit(0);
+ _exit(0);
}
/*
@@ -784,8 +788,8 @@ TEST_F(hmm, anon_write_hugetlbfs)
int *ptr;
int ret;
- if (!default_hsize)
- SKIP(return, "Huge page size could not be determined");
+ if (!hugetlb_free_default_pages())
+ SKIP(return, "Not enough huge pages");
size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
@@ -1599,8 +1603,8 @@ TEST_F(hmm2, snapshot)
}
/*
- * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
- * should be mapped by a large page table entry.
+ * Test the hmm_range_fault() handling of large pages (PMD or PUD)
+ * that should be mapped by a large page table entry.
*/
TEST_F(hmm, compound)
{
@@ -1610,13 +1614,13 @@ TEST_F(hmm, compound)
unsigned long default_hsize = default_huge_page_size();
int *ptr;
unsigned char *m;
+ unsigned char prot;
int ret;
unsigned long i;
/* Skip test if we can't allocate a hugetlbfs page. */
-
- if (!default_hsize)
- SKIP(return, "Huge page size could not be determined");
+ if (!hugetlb_free_default_pages())
+ SKIP(return, "Not enough huge pages");
size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
@@ -1646,11 +1650,20 @@ TEST_F(hmm, compound)
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
- /* Check what the device saw. */
+ /*
+ * Check what the device saw. The region is backed by a single huge
+ * page that the device reports either at PMD or at PUD level depending
+ * on the configured default hugepage size. Determine that level from
+ * the first page and require every page in the range to match it
+ * exactly, so that a fragmented mapping mixing levels (or a missing
+ * large-page bit) is still caught and reported with its actual value.
+ */
m = buffer->mirror;
+ prot = HMM_DMIRROR_PROT_WRITE |
+ ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD :
+ HMM_DMIRROR_PROT_PMD);
for (i = 0; i < npages; ++i)
- ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
- HMM_DMIRROR_PROT_PMD);
+ ASSERT_EQ(m[i], prot);
/* Make the region read-only. */
ret = mprotect(buffer->ptr, size, PROT_READ);
@@ -1661,11 +1674,17 @@ TEST_F(hmm, compound)
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
- /* Check what the device saw. */
+ /*
+ * Check what the device saw after mprotect(PROT_READ). Same
+ * approach as above: determine the mapping level from the first
+ * page and require every page to match it exactly.
+ */
m = buffer->mirror;
+ prot = HMM_DMIRROR_PROT_READ |
+ ((m[0] & HMM_DMIRROR_PROT_PUD) ? HMM_DMIRROR_PROT_PUD :
+ HMM_DMIRROR_PROT_PMD);
for (i = 0; i < npages; ++i)
- ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
- HMM_DMIRROR_PROT_PMD);
+ ASSERT_EQ(m[i], prot);
munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
@@ -1865,6 +1884,8 @@ TEST_F(hmm, exclusive_cow)
unsigned long i;
int *ptr;
int ret;
+ pid_t pid;
+ int status;
npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
ASSERT_NE(npages, 0);
@@ -1893,14 +1914,37 @@ TEST_F(hmm, exclusive_cow)
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
- fork();
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
- /* Fault pages back to system memory and check them. */
+ if (pid == 0) {
+ /*
+ * Child verifies COW independently, then _exit(0)s so it does
+ * not run the test teardown. A failed ASSERT_* here makes the
+ * harness abort() the child, so the parent sees
+ * !WIFEXITED(status) below and fails in turn.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i + 1);
+
+ _exit(0);
+ }
+
+ /* Parent: also increment to verify COW works for both processes. */
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i]++, i);
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i+1);
+ ASSERT_EQ(ptr[i], i + 1);
+
+ /* Parent: wait for child and then free the buffer. */
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
hmm_buffer_free(buffer);
}
@@ -2062,7 +2106,7 @@ TEST_F(hmm, hmm_cow_in_device)
if (pid == -1)
ASSERT_EQ(pid, 0);
if (!pid) {
- /* Child process waits for SIGTERM from the parent. */
+ /* Child process waits for SIGKILL from the parent. */
while (1) {
}
/* Should not reach this */
@@ -2075,10 +2119,10 @@ TEST_F(hmm, hmm_cow_in_device)
ptr[i] = i;
/* Terminate child and wait */
- EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(0, kill(pid, SIGKILL));
EXPECT_EQ(pid, waitpid(pid, &status, 0));
EXPECT_NE(0, WIFSIGNALED(status));
- EXPECT_EQ(SIGTERM, WTERMSIG(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
/* Take snapshot to CPU pagetables */
ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
@@ -2274,8 +2318,11 @@ TEST_F(hmm, migrate_anon_huge_fault)
unsigned long npages;
unsigned long size;
unsigned long i;
+ unsigned char *m;
+ uint64_t entry;
void *old_ptr;
void *map;
+ int pagemap_fd;
int *ptr;
int ret;
@@ -2298,8 +2345,6 @@ TEST_F(hmm, migrate_anon_huge_fault)
npages = size >> self->page_shift;
map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
- ret = madvise(map, size, MADV_HUGEPAGE);
- ASSERT_EQ(ret, 0);
old_ptr = buffer->ptr;
buffer->ptr = map;
@@ -2307,6 +2352,9 @@ TEST_F(hmm, migrate_anon_huge_fault)
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ptr[i] = i;
+ ret = madvise(map, size, MADV_COLLAPSE);
+ ASSERT_EQ(ret, 0);
+
/* Migrate memory to device. */
ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
@@ -2316,6 +2364,32 @@ TEST_F(hmm, migrate_anon_huge_fault)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
+ if (!hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT,
+ buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE |
+ HMM_DMIRROR_PROT_PMD);
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_GE(pagemap_fd, 0);
+
+ for (i = 0; i < npages; ++i) {
+ entry = pagemap_get_entry(pagemap_fd,
+ (char *)buffer->ptr + i * self->page_size);
+
+ ASSERT_NE(entry & PM_SWAP, 0);
+ ASSERT_FALSE(PAGEMAP_PRESENT(entry));
+ }
+
+ close(pagemap_fd);
+ }
+
/* Fault pages back to system memory and check them. */
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
@@ -2332,12 +2406,21 @@ TEST_F(hmm, migrate_partial_unmap_fault)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size = read_pmd_pagesize();
+ unsigned long unmap_size;
+ unsigned long offsets[3];
unsigned long i;
void *old_ptr;
void *map;
int *ptr;
int ret, j, use_thp;
- int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ if (!size)
+ size = TWOMEG;
+
+ unmap_size = size / 2;
+ offsets[0] = 0;
+ offsets[1] = size / 4;
+ offsets[2] = size / 2;
for (use_thp = 0; use_thp < 2; ++use_thp) {
for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
@@ -2379,12 +2462,12 @@ TEST_F(hmm, migrate_partial_unmap_fault)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- munmap(buffer->ptr + offsets[j], ONEMEG);
+ munmap(buffer->ptr + offsets[j], unmap_size);
/* Fault pages back to system memory and check them. */
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
if (i * sizeof(int) < offsets[j] ||
- i * sizeof(int) >= offsets[j] + ONEMEG)
+ i * sizeof(int) >= offsets[j] + unmap_size)
ASSERT_EQ(ptr[i], i);
buffer->ptr = old_ptr;
@@ -2398,12 +2481,19 @@ TEST_F(hmm, migrate_remap_fault)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size = read_pmd_pagesize();
+ unsigned long offsets[3];
unsigned long i;
void *old_ptr, *new_ptr = NULL;
void *map;
int *ptr;
int ret, j, use_thp, dont_unmap, before;
- int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ if (!size)
+ size = TWOMEG;
+
+ offsets[0] = 0;
+ offsets[1] = size / 4;
+ offsets[2] = size / 2;
for (before = 0; before < 2; ++before) {
for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
@@ -2738,7 +2828,7 @@ static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_siz
buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (!buffer->ptr)
+ if (buffer->ptr == MAP_FAILED)
return -1;
/* Apply THP hint if requested */
@@ -2806,38 +2896,45 @@ static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_siz
TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
{
struct benchmark_results thp_results, regular_results;
- size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
+ size_t thp_size = read_pmd_pagesize();
int iterations = 5;
+ if (!thp_size)
+ thp_size = TWOMEG;
+
printf("\nHMM THP Migration Benchmark\n");
printf("---------------------------\n");
printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
/* Test different buffer sizes */
size_t test_sizes[] = {
- thp_size / 4, /* 512KB - smaller than THP */
- thp_size / 2, /* 1MB - half THP */
- thp_size, /* 2MB - single THP */
- thp_size * 2, /* 4MB - two THPs */
- thp_size * 4, /* 8MB - four THPs */
- thp_size * 8, /* 16MB - eight THPs */
- thp_size * 128, /* 256MB - one twenty eight THPs */
+ thp_size / 4, /* quarter THP */
+ thp_size / 2, /* half THP */
+ thp_size, /* single THP */
+ thp_size * 2, /* two THPs */
+ thp_size * 4, /* four THPs */
+ thp_size * 8, /* eight THPs */
+ thp_size * 128, /* one twenty eight THPs */
};
static const char *const test_names[] = {
- "Small Buffer (512KB)",
- "Half THP Size (1MB)",
- "Single THP Size (2MB)",
- "Two THP Size (4MB)",
- "Four THP Size (8MB)",
- "Eight THP Size (16MB)",
- "One twenty eight THP Size (256MB)"
+ "Small Buffer",
+ "Half THP Size",
+ "Single THP Size",
+ "Two THP Size",
+ "Four THP Size",
+ "Eight THP Size",
+ "One twenty eight THP Size"
};
int num_tests = ARRAY_SIZE(test_sizes);
/* Run all tests */
for (int i = 0; i < num_tests; i++) {
+ /* Skip test sizes exceeding INT_MAX to avoid overflow */
+ if (test_sizes[i] > INT_MAX)
+ break;
+
/* Test with THP */
ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
iterations, &thp_results), 0);
diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
deleted file mode 100644
index d543419de040..000000000000
--- a/tools/testing/selftests/mm/hugepage-mmap.c
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * hugepage-mmap:
- *
- * Example of using huge page memory in a user application using the mmap
- * system call. Before running this application, make sure that the
- * administrator has mounted the hugetlbfs filesystem (on some directory
- * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
- * example, the app is requesting memory of size 256MB that is backed by
- * huge pages.
- */
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include "kselftest.h"
-
-#define LENGTH (256UL*1024*1024)
-#define PROTECTION (PROT_READ | PROT_WRITE)
-
-static void check_bytes(char *addr)
-{
- ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
-}
-
-static void write_bytes(char *addr)
-{
- unsigned long i;
-
- for (i = 0; i < LENGTH; i++)
- *(addr + i) = (char)i;
-}
-
-static int read_bytes(char *addr)
-{
- unsigned long i;
-
- check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
- if (*(addr + i) != (char)i) {
- ksft_print_msg("Error: Mismatch at %lu\n", i);
- return 1;
- }
- return 0;
-}
-
-int main(void)
-{
- void *addr;
- int fd, ret;
-
- ksft_print_header();
- ksft_set_plan(1);
-
- fd = memfd_create("hugepage-mmap", MFD_HUGETLB);
- if (fd < 0)
- ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno));
-
- addr = mmap(NULL, LENGTH, PROTECTION, MAP_SHARED, fd, 0);
- if (addr == MAP_FAILED) {
- close(fd);
- ksft_exit_fail_msg("mmap(): %s\n", strerror(errno));
- }
-
- ksft_print_msg("Returned address is %p\n", addr);
- check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
-
- munmap(addr, LENGTH);
- close(fd);
-
- ksft_test_result(!ret, "Read same data\n");
-
- ksft_exit(!ret);
-}
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/hugepage_settings.c
index e748ebfb3d4e..2eab2110ac6a 100644
--- a/tools/testing/selftests/mm/thp_settings.c
+++ b/tools/testing/selftests/mm/hugepage_settings.c
@@ -1,13 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
#include <fcntl.h>
#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
#define MAX_SETTINGS_DEPTH 4
@@ -15,6 +18,7 @@ static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH];
static int settings_index;
static struct thp_settings saved_settings;
static char dev_queue_read_ahead_path[PATH_MAX];
+static bool thp_settings_saved;
static const char * const thp_enabled_strings[] = {
"never",
@@ -44,47 +48,6 @@ static const char * const shmem_enabled_strings[] = {
NULL
};
-int read_file(const char *path, char *buf, size_t buflen)
-{
- int fd;
- ssize_t numread;
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return 0;
-
- numread = read(fd, buf, buflen - 1);
- if (numread < 1) {
- close(fd);
- return 0;
- }
-
- buf[numread] = '\0';
- close(fd);
-
- return (unsigned int) numread;
-}
-
-unsigned long read_num(const char *path)
-{
- char buf[21];
-
- if (read_file(path, buf, sizeof(buf)) < 0) {
- perror("read_file()");
- exit(EXIT_FAILURE);
- }
-
- return strtoul(buf, NULL, 10);
-}
-
-void write_num(const char *path, unsigned long num)
-{
- char buf[21];
-
- sprintf(buf, "%ld", num);
- write_file(path, buf, strlen(buf) + 1);
-}
-
int thp_read_string(const char *name, const char * const strings[])
{
char path[PATH_MAX];
@@ -298,12 +261,20 @@ void thp_pop_settings(void)
void thp_restore_settings(void)
{
- thp_write_settings(&saved_settings);
+ if (thp_settings_saved)
+ thp_write_settings(&saved_settings);
}
-void thp_save_settings(void)
+static void __thp_save_settings(void)
{
+ if (!thp_available())
+ return;
+
+ if (thp_settings_saved)
+ return;
+
thp_read_settings(&saved_settings);
+ thp_settings_saved = true;
}
void thp_set_read_ahead_path(char *path)
@@ -370,3 +341,260 @@ bool thp_is_enabled(void)
/* THP is considered enabled if it's either "always" or "madvise" */
return mode == 1 || mode == 3;
}
+
+#define HUGETLB_MAX_NR_PAGESIZES 10
+struct hugetlb_settings {
+ unsigned long nr_hugepages[HUGETLB_MAX_NR_PAGESIZES];
+ unsigned long sizes[HUGETLB_MAX_NR_PAGESIZES];
+ unsigned long default_size;
+ int nr_sizes;
+};
+
+static struct hugetlb_settings hugetlb_saved_settings;
+static bool hugetlb_settings_saved;
+
+int detect_hugetlb_page_sizes(unsigned long sizes[], int max)
+{
+ static struct hugetlb_settings *settings = &hugetlb_saved_settings;
+ DIR *dir;
+ int count = 0;
+
+ if (settings->nr_sizes) {
+ if (settings->nr_sizes < max)
+ max = settings->nr_sizes;
+ for (count = 0; count < max; count++)
+ sizes[count] = settings->sizes[count];
+ return count;
+ }
+
+ dir = opendir("/sys/kernel/mm/hugepages/");
+ if (!dir)
+ return 0;
+
+ while (count < max) {
+ struct dirent *entry = readdir(dir);
+ size_t kb;
+
+ if (!entry)
+ break;
+ if (entry->d_type != DT_DIR)
+ continue;
+ if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
+ continue;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
+ kb);
+ }
+ closedir(dir);
+ return count;
+}
+
+unsigned long default_huge_page_size(void)
+{
+ static struct hugetlb_settings *settings = &hugetlb_saved_settings;
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f;
+
+ if (settings->default_size)
+ return settings->default_size;
+
+ f = fopen("/proc/meminfo", "r");
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+static void hugetlb_sysfs_path(char *buf, size_t buflen,
+ unsigned long size, const char *attr)
+{
+ snprintf(buf, buflen, "/sys/kernel/mm/hugepages/hugepages-%lukB/%s",
+ size / 1024, attr);
+}
+
+unsigned long hugetlb_nr_pages(unsigned long size)
+{
+ char path[PATH_MAX];
+
+ hugetlb_sysfs_path(path, sizeof(path), size, "nr_hugepages");
+
+ return read_num(path);
+}
+
+void hugetlb_set_nr_pages(unsigned long size, unsigned long nr)
+{
+ char path[PATH_MAX];
+
+ hugetlb_sysfs_path(path, sizeof(path), size, "nr_hugepages");
+
+ write_num(path, nr);
+}
+
+unsigned long hugetlb_free_pages(unsigned long size)
+{
+ char path[PATH_MAX];
+
+ hugetlb_sysfs_path(path, sizeof(path), size, "free_hugepages");
+
+ return read_num(path);
+}
+
+static bool __hugetlb_setup(unsigned long size, unsigned long nr)
+{
+ unsigned long free = hugetlb_free_pages(size);
+ unsigned long total = hugetlb_nr_pages(size);
+
+ if (free >= nr)
+ return true;
+
+ hugetlb_set_nr_pages(size, total + (nr - free));
+
+ return hugetlb_free_pages(size) >= nr;
+}
+
+bool hugetlb_setup_default(unsigned long nr)
+{
+ unsigned long size;
+
+ hugetlb_save_settings();
+ size = default_huge_page_size();
+ if (!size)
+ return false;
+
+ return __hugetlb_setup(size, nr);
+}
+
+bool hugetlb_setup_default_exact(unsigned long nr)
+{
+ unsigned long size;
+
+ hugetlb_save_settings();
+ size = default_huge_page_size();
+ if (!size)
+ return false;
+
+ hugetlb_set_nr_pages(size, nr);
+
+ return hugetlb_free_pages(size) == nr;
+}
+
+unsigned long hugetlb_setup(unsigned long nr, unsigned long sizes[],
+ int max)
+{
+ unsigned long enabled[10];
+ int nr_sizes = 0;
+ int nr_enabled;
+
+ hugetlb_save_settings();
+
+ nr_enabled = detect_hugetlb_page_sizes(enabled, ARRAY_SIZE(enabled));
+ if (!nr_enabled)
+ return 0;
+
+ if (nr_enabled > max) {
+ ksft_print_msg("detected %d huge page sizes, will only test %d\n", nr_enabled, max);
+ nr_enabled = max;
+ }
+
+ /* request nr HugeTLB pages of every size. */
+ for (int i = 0; i < nr_enabled; i++) {
+ if (!__hugetlb_setup(enabled[i], nr))
+ continue;
+ sizes[nr_sizes++] = enabled[i];
+ }
+
+ return nr_sizes;
+}
+
+static void __hugetlb_save_settings(void)
+{
+ struct hugetlb_settings *settings = &hugetlb_saved_settings;
+ int nr_sizes;
+
+ if (hugetlb_settings_saved)
+ return;
+
+ settings->default_size = default_huge_page_size();
+ if (!settings->default_size)
+ return;
+
+ nr_sizes = detect_hugetlb_page_sizes(settings->sizes,
+ HUGETLB_MAX_NR_PAGESIZES);
+ if (!nr_sizes) {
+ settings->default_size = 0;
+ return;
+ }
+
+ for (int i = 0; i < nr_sizes; i++) {
+ unsigned long sz = settings->sizes[i];
+
+ if (!sz)
+ continue;
+ settings->nr_hugepages[i] = hugetlb_nr_pages(sz);
+ }
+
+ settings->nr_sizes = nr_sizes;
+ hugetlb_settings_saved = true;
+}
+
+void hugetlb_restore_settings(void)
+{
+ struct hugetlb_settings *settings = &hugetlb_saved_settings;
+
+ if (!hugetlb_settings_saved || !settings->default_size)
+ return;
+
+ for (int i = 0; i < HUGETLB_MAX_NR_PAGESIZES; i++) {
+ unsigned long sz = settings->sizes[i];
+
+ if (!sz)
+ continue;
+
+ hugetlb_set_nr_pages(sz, settings->nr_hugepages[i]);
+ }
+}
+
+static void hugepage_restore_settings_atexit(void)
+{
+ if (thp_settings_saved)
+ thp_restore_settings();
+ if (hugetlb_settings_saved)
+ hugetlb_restore_settings();
+}
+
+static void hugepage_restore_settings_sighandler(int sig)
+{
+ /* exit() will invoke the hugepage_restore_settings_atexit handler. */
+ exit(KSFT_FAIL);
+}
+
+void hugepage_save_settings(bool thp, bool hugetlb)
+{
+ if (!thp && !hugetlb)
+ return;
+
+ if (thp)
+ __thp_save_settings();
+ if (hugetlb)
+ __hugetlb_save_settings();
+
+ /*
+ * setup exit hooks to make sure THP and HugeTLB settings are
+ * restored on graceful and error exits and signals
+ */
+ atexit(hugepage_restore_settings_atexit);
+ signal(SIGTERM, hugepage_restore_settings_sighandler);
+ signal(SIGINT, hugepage_restore_settings_sighandler);
+ signal(SIGHUP, hugepage_restore_settings_sighandler);
+ signal(SIGQUIT, hugepage_restore_settings_sighandler);
+}
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/hugepage_settings.h
index 7748a9009191..726c73c43c05 100644
--- a/tools/testing/selftests/mm/thp_settings.h
+++ b/tools/testing/selftests/mm/hugepage_settings.h
@@ -1,11 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __THP_SETTINGS_H__
-#define __THP_SETTINGS_H__
+#ifndef __HUGEPAGE_SETTINGS_H__
+#define __HUGEPAGE_SETTINGS_H__
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+void hugepage_save_settings(bool thp, bool hugetlb);
+
+/* Transparent Huge Pages (THP) */
+
enum thp_enabled {
THP_NEVER,
THP_ALWAYS,
@@ -62,10 +66,6 @@ struct thp_settings {
struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS];
};
-int read_file(const char *path, char *buf, size_t buflen);
-unsigned long read_num(const char *path);
-void write_num(const char *path, unsigned long num);
-
int thp_read_string(const char *name, const char * const strings[]);
void thp_write_string(const char *name, const char *val);
unsigned long thp_read_num(const char *name);
@@ -77,7 +77,11 @@ struct thp_settings *thp_current_settings(void);
void thp_push_settings(struct thp_settings *settings);
void thp_pop_settings(void);
void thp_restore_settings(void);
-void thp_save_settings(void);
+
+static inline void thp_save_settings(void)
+{
+ hugepage_save_settings(/* thp = */ true, /* hugetlb = */ false);
+}
void thp_set_read_ahead_path(char *path);
unsigned long thp_supported_orders(void);
@@ -86,4 +90,66 @@ unsigned long thp_shmem_supported_orders(void);
bool thp_available(void);
bool thp_is_enabled(void);
-#endif /* __THP_SETTINGS_H__ */
+/* HugeTLB */
+
+int detect_hugetlb_page_sizes(unsigned long sizes[], int max);
+unsigned long default_huge_page_size(void);
+
+unsigned long hugetlb_nr_pages(unsigned long size);
+void hugetlb_set_nr_pages(unsigned long size, unsigned long nr);
+unsigned long hugetlb_free_pages(unsigned long size);
+
+static inline void hugetlb_save_settings(void)
+{
+ hugepage_save_settings(/* thp = */ false, /* hugetlb = */ true);
+}
+
+void hugetlb_restore_settings(void);
+
+static inline unsigned long hugetlb_nr_default_pages(void)
+{
+ unsigned long size = default_huge_page_size();
+
+ if (!size)
+ return 0;
+
+ return hugetlb_nr_pages(size);
+}
+
+static inline void hugetlb_set_nr_default_pages(unsigned long nr)
+{
+ unsigned long size = default_huge_page_size();
+
+ if (!size)
+ return;
+
+ hugetlb_set_nr_pages(size, nr);
+}
+
+static inline unsigned long hugetlb_free_default_pages(void)
+{
+ unsigned long size = default_huge_page_size();
+
+ if (!size)
+ return 0;
+
+ return hugetlb_free_pages(size);
+}
+
+static inline bool hugetlb_available(void)
+{
+ return default_huge_page_size() != 0;
+}
+
+bool hugetlb_setup_default(unsigned long nr);
+bool hugetlb_setup_default_exact(unsigned long nr);
+unsigned long hugetlb_setup(unsigned long nr, unsigned long sizes[],
+ int max);
+
+#define HUGETLB_SETUP_DEFAULT_PAGES(nr_pages) \
+static void __attribute__((constructor)) __hugetlb_setup_default(void) \
+{ \
+ hugetlb_setup_default((nr_pages)); \
+}
+
+#endif /* __HUGEPAGE_SETTINGS_H__ */
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index 5b12041fa310..555b4b3d1430 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -1,15 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * hugepage-madvise:
+ * hugetlb-madvise:
*
* Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
* on hugetlb mappings.
- *
- * Before running this test, make sure the administrator has pre-allocated
- * at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
- * the test takes an argument that is the path to a file in a hugetlbfs
- * filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
- * directory.
*/
#define _GNU_SOURCE
@@ -20,18 +14,18 @@
#include <fcntl.h>
#include "vm_util.h"
#include "kselftest.h"
+#include "hugepage_settings.h"
#define MIN_FREE_PAGES 20
#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
#define validate_free_pages(exp_free) \
do { \
- int fhp = get_free_hugepages(); \
- if (fhp != (exp_free)) { \
- printf("Unexpected number of free huge " \
- "pages line %d\n", __LINE__); \
- exit(1); \
- } \
+ unsigned long fhp = hugetlb_free_default_pages(); \
+ if (fhp != (exp_free)) \
+ ksft_exit_fail_msg("Unexpected number of free " \
+ "huge pages %lu, expected %lu line %d\n", \
+ fhp, (exp_free), __LINE__); \
} while (0)
unsigned long huge_page_size;
@@ -57,28 +51,24 @@ int main(int argc, char **argv)
int fd;
int ret;
+ ksft_print_header();
+ ksft_set_plan(1);
+
huge_page_size = default_huge_page_size();
- if (!huge_page_size) {
- printf("Unable to determine huge page size, exiting!\n");
- exit(1);
- }
+ if (!huge_page_size)
+ ksft_exit_skip("Unable to determine huge page size\n");
+
base_page_size = sysconf(_SC_PAGE_SIZE);
- if (!huge_page_size) {
- printf("Unable to determine base page size, exiting!\n");
- exit(1);
- }
+ if (!base_page_size)
+ ksft_exit_fail_msg("Unable to determine base page size\n");
- free_hugepages = get_free_hugepages();
- if (free_hugepages < MIN_FREE_PAGES) {
- printf("Not enough free huge pages to test, exiting!\n");
- exit(KSFT_SKIP);
- }
+ if (!hugetlb_setup_default(MIN_FREE_PAGES))
+ ksft_exit_skip("Not enough free huge pages (have %lu, need %d)\n", hugetlb_free_default_pages(), MIN_FREE_PAGES);
+ free_hugepages = hugetlb_free_default_pages();
fd = memfd_create(argv[0], MFD_HUGETLB);
- if (fd < 0) {
- perror("memfd_create() failed");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_perror("memfd_create");
/*
* Test validity of MADV_DONTNEED addr and length arguments. mmap
@@ -90,16 +80,13 @@ int main(int argc, char **argv)
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
+
if (munmap(addr, huge_page_size) ||
munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
- huge_page_size)) {
- perror("munmap");
- exit(1);
- }
+ huge_page_size))
+ ksft_exit_fail_perror("munmap");
addr = addr + huge_page_size;
write_fault_pages(addr, NR_HUGE_PAGES);
@@ -108,20 +95,14 @@ int main(int argc, char **argv)
/* addr before mapping should fail */
ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
MADV_DONTNEED);
- if (!ret) {
- printf("Unexpected success of madvise call with invalid addr line %d\n",
- __LINE__);
- exit(1);
- }
+ if (!ret)
+ ksft_exit_fail_msg("madvise with invalid addr unexpectedly succeeded line %d\n", __LINE__);
/* addr + length after mapping should fail */
ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
MADV_DONTNEED);
- if (!ret) {
- printf("Unexpected success of madvise call with invalid length line %d\n",
- __LINE__);
- exit(1);
- }
+ if (!ret)
+ ksft_exit_fail_msg("madvise with invalid length unexpectedly succeeded line %d\n", __LINE__);
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
@@ -132,10 +113,9 @@ int main(int argc, char **argv)
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
+
write_fault_pages(addr, NR_HUGE_PAGES);
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
@@ -143,19 +123,14 @@ int main(int argc, char **argv)
ret = madvise(addr + base_page_size,
NR_HUGE_PAGES * huge_page_size - base_page_size,
MADV_DONTNEED);
- if (!ret) {
- printf("Unexpected success of madvise call with unaligned start address %d\n",
- __LINE__);
- exit(1);
- }
+ if (!ret)
+ ksft_exit_fail_msg("madvise with unaligned start unexpectedly succeeded line %d\n", __LINE__);
/* addr + length should be aligned down to huge page size */
if (madvise(addr,
((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
- MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
/* should free all but last page in mapping */
validate_free_pages(free_hugepages - 1);
@@ -170,17 +145,14 @@ int main(int argc, char **argv)
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
+
write_fault_pages(addr, NR_HUGE_PAGES);
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
/* should free all pages in mapping */
validate_free_pages(free_hugepages);
@@ -190,29 +162,25 @@ int main(int argc, char **argv)
/*
* Test MADV_DONTNEED on private mapping of hugetlb file
*/
- if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
- perror("fallocate");
- exit(1);
- }
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size))
+ ksft_exit_fail_perror("fallocate");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE, fd, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
/* read should not consume any pages */
read_fault_pages(addr, NR_HUGE_PAGES);
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/* madvise should not free any pages */
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/* writes should allocate private pages */
@@ -220,10 +188,9 @@ int main(int argc, char **argv)
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
/* madvise should free private pages */
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/* writes should allocate private pages */
@@ -238,10 +205,9 @@ int main(int argc, char **argv)
* implementation.
*/
if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- 0, NR_HUGE_PAGES * huge_page_size)) {
- perror("fallocate");
- exit(1);
- }
+ 0, NR_HUGE_PAGES * huge_page_size))
+ ksft_exit_fail_perror("fallocate");
+
validate_free_pages(free_hugepages);
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
@@ -249,29 +215,25 @@ int main(int argc, char **argv)
/*
* Test MADV_DONTNEED on shared mapping of hugetlb file
*/
- if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
- perror("fallocate");
- exit(1);
- }
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size))
+ ksft_exit_fail_perror("fallocate");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
/* write should not consume any pages */
write_fault_pages(addr, NR_HUGE_PAGES);
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/* madvise should not free any pages */
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/*
@@ -279,29 +241,25 @@ int main(int argc, char **argv)
*
* madvise is same as hole punch and should free all pages.
*/
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages);
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
/*
* Test MADV_REMOVE on shared and private mapping of hugetlb file
*/
- if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
- perror("fallocate");
- exit(1);
- }
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size))
+ ksft_exit_fail_perror("fallocate");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
/* shared write should not consume any additional pages */
write_fault_pages(addr, NR_HUGE_PAGES);
@@ -310,10 +268,8 @@ int main(int argc, char **argv)
addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE, fd, 0);
- if (addr2 == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr2 == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
/* private read should not consume any pages */
read_fault_pages(addr2, NR_HUGE_PAGES);
@@ -324,17 +280,15 @@ int main(int argc, char **argv)
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
/* madvise of shared mapping should not free any pages */
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
/* madvise of private mapping should free private pages */
- if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
/* private write should consume additional pages again */
@@ -346,15 +300,16 @@ int main(int argc, char **argv)
* not correct. private pages should not be freed, but this is
* expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
*/
- if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
- perror("madvise");
- exit(1);
- }
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE))
+ ksft_exit_fail_perror("madvise");
+
validate_free_pages(free_hugepages);
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
close(fd);
- return 0;
+
+ ksft_test_result_pass("MADV_DONTNEED and MADV_REMOVE on hugetlb\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/hugetlb-mmap.c b/tools/testing/selftests/mm/hugetlb-mmap.c
new file mode 100644
index 000000000000..0f2aad1b7dbd
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-mmap.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugetlb-mmap:
+ *
+ * Example of using huge page memory in a user application using the mmap
+ * system call. Before running this application, make sure that the
+ * administrator has mounted the hugetlbfs filesystem (on some directory
+ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
+ * example, the app is requesting memory of size 256MB that is backed by
+ * huge pages.
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <linux/memfd.h>
+#include "vm_util.h"
+#include "kselftest.h"
+#include "hugepage_settings.h"
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+static void check_bytes(char *addr)
+{
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static bool verify_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < length; i++)
+ if (*(addr + i) != (char)i) {
+ ksft_print_msg("Error: Mismatch at %lu(%p)\n", i, addr);
+ return false;
+ }
+
+ return true;
+}
+
+static void test_mmap(size_t length, int mmap_flags, int fd,
+ const char *test_name)
+{
+ bool passed = true;
+ void *addr;
+
+ addr = mmap(NULL, length, PROTECTION, mmap_flags, fd, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
+
+ ksft_print_msg("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr, length);
+ if (!verify_bytes(addr, length))
+ passed = false;
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, length))
+ ksft_exit_fail_perror("munmap");
+
+ ksft_test_result(passed, "%s\n", test_name);
+}
+
+static void test_anon_mmap(size_t length, int shift)
+{
+ const char *test_name = "hugetlb anonymous mmap";
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+
+ if (shift)
+ mmap_flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
+
+ test_mmap(length, mmap_flags, -1, test_name);
+}
+
+static void test_file_mmap(size_t length, int shift)
+{
+ const char *test_name = "hugetlb file mmap";
+ int mfd_flags = MFD_HUGETLB;
+ int fd;
+
+ if (shift)
+ mfd_flags |= (shift & MFD_HUGE_MASK) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("hugetlb-mmap", mfd_flags);
+ if (fd < 0)
+ ksft_exit_fail_perror("memfd_create");
+
+ test_mmap(length, MAP_SHARED, fd, test_name);
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ size_t hugepage_size;
+ size_t length = LENGTH;
+ int shift = 0, nr;
+
+ ksft_print_header();
+
+ if (argc > 1)
+ length = atol(argv[1]) << 20;
+ if (argc > 2)
+ shift = atoi(argv[2]);
+
+ hugetlb_save_settings();
+ if (shift) {
+ hugepage_size = (1UL << shift);
+ ksft_print_msg("%lu kB hugepages\n", 1UL << (shift - 10));
+ } else {
+ hugepage_size = default_huge_page_size();
+ if (!hugepage_size)
+ ksft_exit_skip("Could not detect default hugetlb page size.");
+ ksft_print_msg("Default size hugepages (%lu kB)\n", hugepage_size >> 10);
+ }
+
+ /* munmap will fail if the length is not page aligned */
+ length = (length + hugepage_size - 1) & ~(hugepage_size - 1);
+ nr = length / hugepage_size;
+
+ hugetlb_set_nr_pages(hugepage_size, nr);
+ if (hugetlb_free_pages(hugepage_size) < nr)
+ ksft_exit_skip("Not enough %lu Kb pages\n", hugepage_size >> 10);
+
+ ksft_set_plan(2);
+ ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
+
+ test_anon_mmap(length, shift);
+ test_file_mmap(length, shift);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugetlb-mremap.c
index b8f7d92e5a35..ed3d92e862d8 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugetlb-mremap.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * hugepage-mremap:
+ * hugetlb-mremap:
*
* Example of remapping huge page memory in a user application using the
* mremap system call. The path to a file in a hugetlbfs filesystem must
@@ -26,12 +26,13 @@
#include <stdbool.h>
#include "kselftest.h"
#include "vm_util.h"
+#include "hugepage_settings.h"
#define DEFAULT_LENGTH_MB 10UL
#define MB_TO_BYTES(x) (x * 1024 * 1024)
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
-#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
+#define FLAGS (MAP_HUGETLB | MAP_SHARED)
static void check_bytes(char *addr)
{
@@ -85,31 +86,21 @@ static void register_region_with_uffd(char *addr, size_t len)
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno));
- /* Create a private anonymous mapping. The memory will be
- * demand-zero paged--that is, not yet allocated. When we
- * actually touch the memory, it will be allocated via
- * the userfaultfd.
- */
-
- addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED)
- ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
-
- ksft_print_msg("Address returned by mmap() = %p\n", addr);
-
- /* Register the memory range of the mapping we just created for
- * handling by the userfaultfd object. In mode, we request to track
- * missing pages (i.e., pages that have not yet been faulted in).
+ /* Register the passed memory range for handling by the userfaultfd object.
+ * In mode, we request to track missing pages
+ * (i.e., pages that have not yet been faulted in).
*/
if (uffd_register(uffd, addr, len, true, false, false))
ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno));
+
+ ksft_print_msg("Registered memory at address %p with userfaultfd\n", addr);
}
int main(int argc, char *argv[])
{
+ unsigned long hugepage_size;
+ int ret = 0, fd, nr;
size_t length = 0;
- int ret = 0, fd;
ksft_print_header();
ksft_set_plan(1);
@@ -125,30 +116,36 @@ int main(int argc, char *argv[])
else
length = DEFAULT_LENGTH_MB;
+ hugepage_size = default_huge_page_size();
+ if (!hugepage_size)
+ ksft_exit_skip("Could not detect default hugetlb page size\n");
length = MB_TO_BYTES(length);
+ length = (length + hugepage_size - 1) & ~(hugepage_size - 1);
+ nr = length / hugepage_size;
+
+ if (!hugetlb_setup_default(nr))
+ ksft_exit_skip("Not enough huge pages\n");
+
fd = memfd_create(argv[0], MFD_HUGETLB);
if (fd < 0)
ksft_exit_fail_msg("Open failed: %s\n", strerror(errno));
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
- void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
- MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION, FLAGS, fd, 0);
ksft_print_msg("Map haddr: Returned address is %p\n", haddr);
if (haddr == MAP_FAILED)
ksft_exit_fail_msg("mmap1: %s\n", strerror(errno));
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
- void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
- MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION, FLAGS, fd, 0);
ksft_print_msg("Map daddr: Returned address is %p\n", daddr);
if (daddr == MAP_FAILED)
ksft_exit_fail_msg("mmap3: %s\n", strerror(errno));
suggested_addr = 0x7faa40000000;
- void *vaddr =
- mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
+ void *vaddr = mmap((void *)suggested_addr, length, PROTECTION, FLAGS, fd, 0);
ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr);
if (vaddr == MAP_FAILED)
ksft_exit_fail_msg("mmap2: %s\n", strerror(errno));
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
index 46230462ad48..70b24e3660c4 100644
--- a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -10,12 +10,10 @@
#include <sys/statfs.h>
#include <errno.h>
#include <stdbool.h>
+#include <signal.h>
#include "kselftest.h"
-#define PREFIX " ... "
-#define ERROR_PREFIX " !!! "
-
#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16)
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -25,17 +23,22 @@ enum test_status {
TEST_SKIPPED = 2,
};
-static char *status_to_str(enum test_status status)
+static void report_status(enum test_status status, const char *test_name,
+ size_t chunk_size)
{
switch (status) {
case TEST_PASSED:
- return "TEST_PASSED";
+ ksft_test_result_pass("%s chunk_size=0x%lx\n",
+ test_name, chunk_size);
+ break;
case TEST_FAILED:
- return "TEST_FAILED";
+ ksft_test_result_fail("%s chunk_size=0x%lx\n",
+ test_name, chunk_size);
+ break;
case TEST_SKIPPED:
- return "TEST_SKIPPED";
- default:
- return "TEST_???";
+ ksft_test_result_skip("%s chunk_size=0x%lx\n",
+ test_name, chunk_size);
+ break;
}
}
@@ -58,8 +61,8 @@ static bool verify_chunk(char *buf, size_t len, char val)
for (i = 0; i < len; ++i) {
if (buf[i] != val) {
- printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n",
- i, buf[i], val);
+ ksft_print_msg("check fail: buf[%lu] = %u != %u\n",
+ i, buf[i], val);
return false;
}
}
@@ -75,21 +78,21 @@ static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size,
ssize_t total_ret_count = 0;
char val = offset / wr_chunk_size + offset % wr_chunk_size;
- printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset);
- printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
- expected);
+ ksft_print_msg("init val=%u with offset=0x%lx\n", val, offset);
+ ksft_print_msg("expect to read 0x%lx bytes of data in total\n",
+ expected);
if (lseek(fd, offset, SEEK_SET) < 0) {
- perror(PREFIX ERROR_PREFIX "seek failed");
+ ksft_perror("seek failed");
return false;
}
while (offset + total_ret_count < len) {
ret_count = read(fd, buf, wr_chunk_size);
if (ret_count == 0) {
- printf(PREFIX PREFIX "read reach end of the file\n");
+ ksft_print_msg("read reach end of the file\n");
break;
} else if (ret_count < 0) {
- perror(PREFIX ERROR_PREFIX "read failed");
+ ksft_perror("read failed");
break;
}
++val;
@@ -98,8 +101,8 @@ static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size,
total_ret_count += ret_count;
}
- printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
- total_ret_count);
+ ksft_print_msg("actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
return total_ret_count == expected;
}
@@ -112,15 +115,15 @@ static bool read_hugepage_filemap(int fd, size_t len,
ssize_t total_ret_count = 0;
char val = 0;
- printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
- expected);
+ ksft_print_msg("expect to read 0x%lx bytes of data in total\n",
+ expected);
while (total_ret_count < len) {
ret_count = read(fd, buf, wr_chunk_size);
if (ret_count == 0) {
- printf(PREFIX PREFIX "read reach end of the file\n");
+ ksft_print_msg("read reach end of the file\n");
break;
} else if (ret_count < 0) {
- perror(PREFIX ERROR_PREFIX "read failed");
+ ksft_perror("read failed");
break;
}
++val;
@@ -129,8 +132,8 @@ static bool read_hugepage_filemap(int fd, size_t len,
total_ret_count += ret_count;
}
- printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
- total_ret_count);
+ ksft_print_msg("actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
return total_ret_count == expected;
}
@@ -142,14 +145,14 @@ test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size)
char *filemap = NULL;
if (ftruncate(fd, len) < 0) {
- perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ ksft_perror("ftruncate failed");
return status;
}
filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
if (filemap == MAP_FAILED) {
- perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ ksft_perror("mmap for primary mapping failed");
goto done;
}
@@ -162,7 +165,7 @@ test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size)
munmap(filemap, len);
done:
if (ftruncate(fd, 0) < 0) {
- perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ ksft_perror("ftruncate back to 0 failed");
status = TEST_FAILED;
}
@@ -179,14 +182,14 @@ test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size,
const unsigned long pagesize = getpagesize();
if (ftruncate(fd, len) < 0) {
- perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ ksft_perror("ftruncate failed");
return status;
}
filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
if (filemap == MAP_FAILED) {
- perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ ksft_perror("mmap for primary mapping failed");
goto done;
}
@@ -201,7 +204,7 @@ test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size,
*/
hwp_addr = filemap + len / 2 + pagesize;
if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) {
- perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed");
+ ksft_perror("MADV_HWPOISON failed");
goto unmap;
}
@@ -228,7 +231,7 @@ unmap:
munmap(filemap, len);
done:
if (ftruncate(fd, 0) < 0) {
- perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ ksft_perror("ftruncate back to 0 failed");
status = TEST_FAILED;
}
@@ -241,17 +244,17 @@ static int create_hugetlbfs_file(struct statfs *file_stat)
fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
if (fd < 0) {
- perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file");
+ ksft_perror("could not open hugetlbfs file");
return -1;
}
memset(file_stat, 0, sizeof(*file_stat));
if (fstatfs(fd, file_stat)) {
- perror(PREFIX ERROR_PREFIX "fstatfs failed");
+ ksft_perror("fstatfs failed");
goto close;
}
if (file_stat->f_type != HUGETLBFS_MAGIC) {
- printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n");
+ ksft_print_msg("not hugetlbfs file\n");
goto close;
}
@@ -261,6 +264,10 @@ close:
return -1;
}
+static void sigbus_handler(int sig)
+{
+}
+
int main(void)
{
int fd;
@@ -273,50 +280,44 @@ int main(void)
};
size_t i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(wr_chunk_sizes) * 3);
+
+ signal(SIGBUS, sigbus_handler);
for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) {
- printf("Write/read chunk size=0x%lx\n",
- wr_chunk_sizes[i]);
+ ksft_print_msg("Write/read chunk size=0x%lx\n",
+ wr_chunk_sizes[i]);
fd = create_hugetlbfs_file(&file_stat);
if (fd < 0)
- goto create_failure;
- printf(PREFIX "HugeTLB read regression test...\n");
+ ksft_exit_fail_msg("Failed to create hugetlbfs file\n");
+
status = test_hugetlb_read(fd, file_stat.f_bsize,
wr_chunk_sizes[i]);
- printf(PREFIX "HugeTLB read regression test...%s\n",
- status_to_str(status));
close(fd);
- if (status == TEST_FAILED)
- return -1;
+ report_status(status, "HugeTLB read regression",
+ wr_chunk_sizes[i]);
fd = create_hugetlbfs_file(&file_stat);
if (fd < 0)
- goto create_failure;
- printf(PREFIX "HugeTLB read HWPOISON test...\n");
+ ksft_exit_fail_msg("Failed to create hugetlbfs file\n");
+
status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
wr_chunk_sizes[i], false);
- printf(PREFIX "HugeTLB read HWPOISON test...%s\n",
- status_to_str(status));
close(fd);
- if (status == TEST_FAILED)
- return -1;
+ report_status(status, "HugeTLB read HWPOISON",
+ wr_chunk_sizes[i]);
fd = create_hugetlbfs_file(&file_stat);
if (fd < 0)
- goto create_failure;
- printf(PREFIX "HugeTLB seek then read HWPOISON test...\n");
+ ksft_exit_fail_msg("Failed to create hugetlbfs file\n");
+
status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
wr_chunk_sizes[i], true);
- printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n",
- status_to_str(status));
close(fd);
- if (status == TEST_FAILED)
- return -1;
+ report_status(status, "HugeTLB seek then read HWPOISON",
+ wr_chunk_sizes[i]);
}
- return 0;
-
-create_failure:
- printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n");
- return -1;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugetlb-shm.c
index ef06260802b5..3ff7f062b7eb 100644
--- a/tools/testing/selftests/mm/hugepage-shm.c
+++ b/tools/testing/selftests/mm/hugetlb-shm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * hugepage-shm:
+ * hugetlb-shm:
*
* Example of using huge page memory in a user application using Sys V shared
* memory system calls. In this example the app is requesting 256MB of
@@ -28,9 +28,27 @@
#include <sys/shm.h>
#include <sys/mman.h>
+#include "vm_util.h"
+#include "hugepage_settings.h"
+
#define LENGTH (256UL*1024*1024)
-#define dprintf(x) printf(x)
+static void prepare(void)
+{
+ unsigned long length, hugepage_size, nr;
+
+ hugepage_size = default_huge_page_size();
+ if (!hugepage_size)
+ ksft_exit_skip("Unable to determine huge page size\n");
+
+ length = (LENGTH + hugepage_size - 1) & ~(hugepage_size - 1);
+ nr = length / hugepage_size;
+
+ if (!hugetlb_setup_default(nr))
+ ksft_exit_skip("Not enough free huge pages\n");
+
+ shm_limits_prepare(length);
+}
int main(void)
{
@@ -38,44 +56,45 @@ int main(void)
unsigned long i;
char *shmaddr;
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ prepare();
+
shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
- if (shmid < 0) {
- perror("shmget");
- exit(1);
- }
- printf("shmid: 0x%x\n", shmid);
+ if (shmid < 0)
+ ksft_exit_fail_perror("shmget");
+
+ ksft_print_msg("shmid: 0x%x\n", shmid);
shmaddr = shmat(shmid, NULL, 0);
if (shmaddr == (char *)-1) {
- perror("Shared memory attach failure");
+ ksft_perror("Shared memory attach failure");
shmctl(shmid, IPC_RMID, NULL);
- exit(2);
+ ksft_exit_fail();
}
- printf("shmaddr: %p\n", shmaddr);
+ ksft_print_msg("shmaddr: %p\n", shmaddr);
- dprintf("Starting the writes:\n");
- for (i = 0; i < LENGTH; i++) {
+ ksft_print_msg("Starting the writes:\n");
+ for (i = 0; i < LENGTH; i++)
shmaddr[i] = (char)(i);
- if (!(i % (1024 * 1024)))
- dprintf(".");
- }
- dprintf("\n");
- dprintf("Starting the Check...");
+ ksft_print_msg("Starting the Check...");
for (i = 0; i < LENGTH; i++)
- if (shmaddr[i] != (char)i) {
- printf("\nIndex %lu mismatched\n", i);
- exit(3);
- }
- dprintf("Done.\n");
+ if (shmaddr[i] != (char)i)
+ ksft_exit_fail_msg("Data mismatch at index %lu\n", i);
+ ksft_print_msg("Done.\n");
if (shmdt((const void *)shmaddr) != 0) {
- perror("Detach failure");
+ ksft_perror("Detach failure");
shmctl(shmid, IPC_RMID, NULL);
- exit(4);
+ ksft_exit_fail();
}
shmctl(shmid, IPC_RMID, NULL);
- return 0;
+ ksft_test_result_pass("hugepage using SysV shmget/shmat\n");
+ ksft_finished();
}
+
+SHM_LIMITS_RESTORE()
diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c
index a8bc02688085..bc202e4ed2bd 100644
--- a/tools/testing/selftests/mm/hugetlb-soft-offline.c
+++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c
@@ -6,9 +6,7 @@
* - if enable_soft_offline = 1, a hugepage should be dissolved and
* nr_hugepages/free_hugepages should be reduced by 1.
*
- * Before running, make sure more than 2 hugepages of default_hugepagesz
- * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB:
- * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ * The test allocates 8 default hugepages
*/
#define _GNU_SOURCE
@@ -25,6 +23,7 @@
#include <sys/types.h>
#include "kselftest.h"
+#include "hugepage_settings.h"
#ifndef MADV_SOFT_OFFLINE
#define MADV_SOFT_OFFLINE 101
@@ -100,32 +99,6 @@ static int set_enable_soft_offline(int value)
return 0;
}
-static int read_nr_hugepages(unsigned long hugepage_size,
- unsigned long *nr_hugepages)
-{
- char buffer[256] = {0};
- char cmd[256] = {0};
-
- sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages",
- hugepage_size);
- FILE *cmdfile = popen(cmd, "r");
-
- if (cmdfile == NULL) {
- ksft_perror(EPREFIX "failed to popen nr_hugepages");
- return -1;
- }
-
- if (!fgets(buffer, sizeof(buffer), cmdfile)) {
- ksft_perror(EPREFIX "failed to read nr_hugepages");
- pclose(cmdfile);
- return -1;
- }
-
- *nr_hugepages = atoll(buffer);
- pclose(cmdfile);
- return 0;
-}
-
static int create_hugetlbfs_file(struct statfs *file_stat)
{
int fd;
@@ -177,20 +150,14 @@ static void test_soft_offline_common(int enable_soft_offline)
ksft_exit_fail_msg("Failed to set enable_soft_offline\n");
}
- if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) {
- close(fd);
- ksft_exit_fail_msg("Failed to read nr_hugepages\n");
- }
+ nr_hugepages_before = hugetlb_nr_default_pages();
ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
nr_hugepages_before);
ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno);
- if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) {
- close(fd);
- ksft_exit_fail_msg("Failed to read nr_hugepages\n");
- }
+ nr_hugepages_after = hugetlb_nr_default_pages();
ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
nr_hugepages_after);
@@ -219,6 +186,10 @@ static void test_soft_offline_common(int enable_soft_offline)
int main(int argc, char **argv)
{
ksft_print_header();
+
+ if (!hugetlb_setup_default(8))
+ ksft_exit_skip("not enough hugetlb pages\n");
+
ksft_set_plan(2);
test_soft_offline_common(1);
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugetlb-vmemmap.c
index df366a4d1b92..507df78a158d 100644
--- a/tools/testing/selftests/mm/hugepage-vmemmap.c
+++ b/tools/testing/selftests/mm/hugetlb-vmemmap.c
@@ -11,6 +11,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include "vm_util.h"
+#include "hugepage_settings.h"
#define PAGE_COMPOUND_HEAD (1UL << 15)
#define PAGE_COMPOUND_TAIL (1UL << 16)
@@ -63,7 +64,7 @@ static int check_page_flags(unsigned long pfn)
read(fd, &pageflags, sizeof(pageflags));
if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
close(fd);
- printf("Head page flags (%lx) is invalid\n", pageflags);
+ ksft_print_msg("Head page flags (%lx) is invalid\n", pageflags);
return -1;
}
@@ -77,7 +78,7 @@ static int check_page_flags(unsigned long pfn)
if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
(pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
close(fd);
- printf("Tail page flags (%lx) is invalid\n", pageflags);
+ ksft_print_msg("Tail page flags (%lx) is invalid\n", pageflags);
return -1;
}
}
@@ -91,44 +92,41 @@ int main(int argc, char **argv)
{
void *addr;
unsigned long pfn;
+ int ret;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (!hugetlb_setup_default(1))
+ ksft_exit_skip("Not enough free huge pages\n");
pagesize = psize();
maplength = default_huge_page_size();
- if (!maplength) {
- printf("Unable to determine huge page size\n");
- exit(1);
- }
+ if (!maplength)
+ ksft_exit_skip("Unable to determine huge page size\n");
addr = mmap(NULL, maplength, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_perror("mmap");
/* Trigger allocation of HugeTLB page. */
write_bytes(addr, maplength);
pfn = virt_to_pfn(addr);
if (pfn == -1UL) {
+ ksft_perror("virt_to_pfn");
munmap(addr, maplength);
- perror("virt_to_pfn");
- exit(1);
+ ksft_exit_fail();
}
- printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+ ksft_print_msg("Returned address is %p whose pfn is %lx\n", addr, pfn);
- if (check_page_flags(pfn) < 0) {
- munmap(addr, maplength);
- perror("check_page_flags");
- exit(1);
- }
+ ret = check_page_flags(pfn);
- /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, maplength)) {
- perror("munmap");
- exit(1);
- }
+ if (munmap(addr, maplength))
+ ksft_exit_fail_perror("munmap");
- return 0;
+ ksft_test_result(!ret, "HugeTLB vmemmap page flags\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
index 31a054fa8134..fb4600570e13 100644
--- a/tools/testing/selftests/mm/hugetlb_dio.c
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -20,6 +20,7 @@
#include <sys/syscall.h>
#include "vm_util.h"
#include "kselftest.h"
+#include "hugepage_settings.h"
#ifndef STATX_DIOALIGN
#define STATX_DIOALIGN 0x00002000U
@@ -84,19 +85,13 @@ static void run_dio_using_hugetlb(int fd, unsigned int start_off,
/* Get the default huge page size */
h_pagesize = default_huge_page_size();
- if (!h_pagesize)
- ksft_exit_fail_msg("Unable to determine huge page size\n");
/* Reset file position since fd is shared across tests */
if (lseek(fd, 0, SEEK_SET) < 0)
ksft_exit_fail_perror("lseek failed\n");
/* Get the free huge pages before allocation */
- free_hpage_b = get_free_hugepages();
- if (free_hpage_b == 0) {
- close(fd);
- ksft_exit_skip("No free hugepage, exiting!\n");
- }
+ free_hpage_b = hugetlb_free_default_pages();
/* Allocate a hugetlb page */
orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0);
@@ -120,7 +115,7 @@ static void run_dio_using_hugetlb(int fd, unsigned int start_off,
munmap(orig_buffer, h_pagesize);
/* Get the free huge pages after unmap*/
- free_hpage_a = get_free_hugepages();
+ free_hpage_a = hugetlb_free_default_pages();
ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
@@ -140,8 +135,8 @@ int main(void)
ksft_print_header();
- /* Check if huge pages are free */
- if (!get_free_hugepages())
+ /* request a huge page */
+ if (!hugetlb_setup_default(1))
ksft_exit_skip("No free hugepage, exiting\n");
fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664);
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
index b4b257775b74..2dc158054f66 100644
--- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -10,6 +10,7 @@
#include "vm_util.h"
#include "kselftest.h"
+#include "hugepage_settings.h"
#define INLOOP_ITER 100
@@ -53,7 +54,6 @@ void *madv(void *unused)
int main(void)
{
- unsigned long free_hugepages;
pthread_t thread1, thread2;
/*
* On kernel 6.4, we are able to reproduce the problem with ~1000
@@ -77,11 +77,8 @@ int main(void)
ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n",
huge_page_size / 1024);
- free_hugepages = get_free_hugepages();
- if (free_hugepages != 1) {
- ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
- free_hugepages);
- }
+ if (!hugetlb_setup_default(1))
+ ksft_exit_skip("Not enough HugeTLB pages\n");
while (max--) {
huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE,
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
index efd774b41389..f94549efcc6f 100644
--- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -25,7 +25,7 @@
#include <unistd.h>
#include "vm_util.h"
-#include "kselftest.h"
+#include "hugepage_settings.h"
#define INLOOP_ITER 100
@@ -77,7 +77,6 @@ void *map_extra(void *unused)
int main(void)
{
pthread_t thread1, thread2, thread3;
- unsigned long free_hugepages;
void *ret;
/*
@@ -86,12 +85,12 @@ int main(void)
*/
int max = 10;
- free_hugepages = get_free_hugepages();
+ ksft_print_header();
+ ksft_set_plan(1);
- if (free_hugepages != 1) {
+ if (!hugetlb_setup_default_exact(1))
ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
- free_hugepages);
- }
+ hugetlb_free_default_pages());
mmap_size = default_huge_page_size();
@@ -100,10 +99,8 @@ int main(void)
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
- if ((unsigned long)huge_ptr == -1) {
- ksft_test_result_fail("Failed to allocate huge page\n");
- return KSFT_FAIL;
- }
+ if ((unsigned long)huge_ptr == -1)
+ ksft_exit_fail_msg("Failed to allocate huge page\n");
pthread_create(&thread1, NULL, madv, NULL);
pthread_create(&thread2, NULL, touch, NULL);
@@ -115,12 +112,13 @@ int main(void)
if (ret) {
ksft_test_result_fail("Unexpected huge page allocation\n");
- return KSFT_FAIL;
+ ksft_finished();
}
/* Unmap and restart */
munmap(huge_ptr, mmap_size);
}
- return KSFT_PASS;
+ ksft_test_result_pass("No unexpected huge page allocations\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 0dd31892ff67..95f517c3bd16 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -12,6 +12,8 @@ if [[ $(id -u) -ne 0 ]]; then
fi
nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+trap 'echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages' EXIT INT TERM
+
usage_file=usage_in_bytes
if [[ "$1" == "-cgroup-v2" ]]; then
@@ -46,6 +48,13 @@ function get_machine_hugepage_size() {
}
MB=$(get_machine_hugepage_size)
+if (( MB >= 1024 )); then
+ UNIT="GB"
+ MB_DISPLAY=$((MB / 1024))
+else
+ UNIT="MB"
+ MB_DISPLAY=$MB
+fi
function cleanup() {
echo cleanup
@@ -56,7 +65,6 @@ function cleanup() {
rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
rmdir "$CGROUP_ROOT"/a 2>/dev/null
rmdir "$CGROUP_ROOT"/test1 2>/dev/null
- echo $nr_hugepgs >/proc/sys/vm/nr_hugepages
set -e
}
@@ -87,6 +95,7 @@ function assert_with_retry() {
if [[ $elapsed -ge $timeout ]]; then
echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ echo FAIL
cleanup
exit 1
fi
@@ -96,22 +105,19 @@ function assert_with_retry() {
}
function assert_state() {
- local expected_a="$1"
- local expected_a_hugetlb="$2"
- local expected_b=""
+ local expected_a_hugetlb="$1"
local expected_b_hugetlb=""
- if [ ! -z ${3:-} ] && [ ! -z ${4:-} ]; then
- expected_b="$3"
- expected_b_hugetlb="$4"
+ if [ ! -z ${2:-} ]; then
+ expected_b_hugetlb="$2"
fi
- assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
- assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
+ assert_with_retry \
+ "$CGROUP_ROOT/a/hugetlb.${MB_DISPLAY}${UNIT}.$usage_file" "$expected_a_hugetlb"
- if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
- assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
- assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
+ if [[ -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry \
+ "$CGROUP_ROOT/a/b/hugetlb.${MB_DISPLAY}${UNIT}.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -143,18 +149,17 @@ write_hugetlbfs() {
local size="$3"
if [[ $cgroup2 ]]; then
- echo $$ >$CGROUP_ROOT/$cgroup/cgroup.procs
+ cg_file="$CGROUP_ROOT/$cgroup/cgroup.procs"
else
echo 0 >$CGROUP_ROOT/$cgroup/cpuset.mems
echo 0 >$CGROUP_ROOT/$cgroup/cpuset.cpus
- echo $$ >"$CGROUP_ROOT/$cgroup/tasks"
- fi
- ./write_to_hugetlbfs -p "$path" -s "$size" -m 0 -o
- if [[ $cgroup2 ]]; then
- echo $$ >$CGROUP_ROOT/cgroup.procs
- else
- echo $$ >"$CGROUP_ROOT/tasks"
+ cg_file="$CGROUP_ROOT/$cgroup/tasks"
fi
+
+ # Spawn helper to join cgroup before exec to ensure correct cgroup accounting
+ bash -c 'echo $$ > "$1"; exec ./write_to_hugetlbfs -p "$2" -s "$3" -m 0 -o' _ \
+ "$cg_file" "$path" "$size" & pid=$!
+ wait "$pid"
echo
}
@@ -192,21 +197,21 @@ if [[ ! $cgroup2 ]]; then
write_hugetlbfs a "$MNT"/test $size
echo Assert memory charged correctly for parent use.
- assert_state 0 $size 0 0
+ assert_state $size 0
write_hugetlbfs a/b "$MNT"/test2 $size
echo Assert memory charged correctly for child use.
- assert_state 0 $(($size * 2)) 0 $size
+ assert_state $(($size * 2)) $size
rmdir "$CGROUP_ROOT"/a/b
echo Assert memory reparent correctly.
- assert_state 0 $(($size * 2))
+ assert_state $(($size * 2))
rm -rf "$MNT"/*
umount "$MNT"
echo Assert memory uncharged correctly.
- assert_state 0 0
+ assert_state 0
cleanup
fi
@@ -220,16 +225,16 @@ echo write
write_hugetlbfs a/b "$MNT"/test2 $size
echo Assert memory charged correctly for child only use.
-assert_state 0 $(($size)) 0 $size
+assert_state $(($size)) $size
rmdir "$CGROUP_ROOT"/a/b
echo Assert memory reparent correctly.
-assert_state 0 $size
+assert_state $size
rm -rf "$MNT"/*
umount "$MNT"
echo Assert memory uncharged correctly.
-assert_state 0 0
+assert_state 0
cleanup
@@ -240,4 +245,3 @@ if [[ $do_umount ]]; then
rm -rf $CGROUP_ROOT
fi
-echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
index 3fe7ef04ac62..10e8dedcb087 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -22,7 +22,7 @@
#include "linux/magic.h"
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
@@ -41,6 +41,12 @@ enum vma_type {
VMA_SHMEM,
};
+enum file_setup_ops {
+ FILE_SETUP_READ_ONLY_FS,
+ FILE_SETUP_READ_WRITE_FS_READ_DATA,
+ FILE_SETUP_READ_WRITE_FS_WRITE_DATA,
+};
+
struct mem_ops {
void *(*setup_area)(int nr_hpages);
void (*cleanup_area)(void *p, unsigned long size);
@@ -49,7 +55,9 @@ struct mem_ops {
const char *name;
};
-static struct mem_ops *file_ops;
+static struct mem_ops *read_only_file_ops;
+static struct mem_ops *read_write_file_read_ops;
+static struct mem_ops *read_write_file_write_ops;
static struct mem_ops *anon_ops;
static struct mem_ops *shmem_ops;
@@ -72,57 +80,36 @@ struct file_info {
};
static struct file_info finfo;
-static bool skip_settings_restore;
static int exit_status;
static void success(const char *msg)
{
printf(" \e[32m%s\e[0m\n", msg);
+ exit_status = KSFT_PASS;
}
static void fail(const char *msg)
{
printf(" \e[31m%s\e[0m\n", msg);
- exit_status++;
+ exit_status = KSFT_FAIL;
}
static void skip(const char *msg)
{
printf(" \e[33m%s\e[0m\n", msg);
-}
-
-static void restore_settings_atexit(void)
-{
- if (skip_settings_restore)
- return;
-
- printf("Restore THP and khugepaged settings...");
- thp_restore_settings();
- success("OK");
-
- skip_settings_restore = true;
-}
-
-static void restore_settings(int sig)
-{
- /* exit() will invoke the restore_settings_atexit handler. */
- exit(sig ? EXIT_FAILURE : exit_status);
+ exit_status = KSFT_SKIP;
}
static void save_settings(void)
{
- printf("Save THP and khugepaged settings...");
- if (file_ops && finfo.type == VMA_FILE)
+ ksft_print_msg("Save THP and khugepaged settings...");
+ if ((read_only_file_ops || read_write_file_read_ops ||
+ read_write_file_write_ops) &&
+ finfo.type == VMA_FILE)
thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
thp_save_settings();
success("OK");
-
- atexit(restore_settings_atexit);
- signal(SIGTERM, restore_settings);
- signal(SIGINT, restore_settings);
- signal(SIGHUP, restore_settings);
- signal(SIGQUIT, restore_settings);
}
static void get_finfo(const char *dir)
@@ -135,19 +122,13 @@ static void get_finfo(const char *dir)
finfo.dir = dir;
stat(finfo.dir, &path_stat);
- if (!S_ISDIR(path_stat.st_mode)) {
- printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
- exit(EXIT_FAILURE);
- }
+ if (!S_ISDIR(path_stat.st_mode))
+ ksft_exit_fail_msg("%s: Not a directory (%s)\n", __func__, finfo.dir);
if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
- finfo.dir) >= sizeof(finfo.path)) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- if (statfs(finfo.dir, &fs)) {
- perror("statfs()");
- exit(EXIT_FAILURE);
- }
+ finfo.dir) >= sizeof(finfo.path))
+ ksft_exit_fail_msg("%s: Pathname is too long\n", __func__);
+ if (statfs(finfo.dir, &fs))
+ ksft_exit_fail_perror("statfs()");
finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
if (finfo.type == VMA_SHMEM)
return;
@@ -155,40 +136,30 @@ static void get_finfo(const char *dir)
/* Find owning device's queue/read_ahead_kb control */
if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
major(path_stat.st_dev), minor(path_stat.st_dev))
- >= sizeof(path)) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- if (read_file(path, buf, sizeof(buf)) < 0) {
- perror("read_file(read_num)");
- exit(EXIT_FAILURE);
- }
+ >= sizeof(path))
+ ksft_exit_fail_msg("%s: Pathname is too long\n", __func__);
+ if (read_file(path, buf, sizeof(buf)) < 0)
+ ksft_exit_fail_perror("read_file(read_num)");
if (strstr(buf, "DEVTYPE=disk")) {
/* Found it */
if (snprintf(finfo.dev_queue_read_ahead_path,
sizeof(finfo.dev_queue_read_ahead_path),
"/sys/dev/block/%d:%d/queue/read_ahead_kb",
major(path_stat.st_dev), minor(path_stat.st_dev))
- >= sizeof(finfo.dev_queue_read_ahead_path)) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
+ >= sizeof(finfo.dev_queue_read_ahead_path))
+ ksft_exit_fail_msg("%s: Pathname is too long\n", __func__);
return;
}
- if (!strstr(buf, "DEVTYPE=partition")) {
- printf("%s: Unknown device type: %s\n", __func__, path);
- exit(EXIT_FAILURE);
- }
+ if (!strstr(buf, "DEVTYPE=partition"))
+ ksft_exit_fail_msg("%s: Unknown device type: %s\n", __func__, path);
/*
* Partition of block device - need to find actual device.
* Using naming convention that devnameN is partition of
* device devname.
*/
str = strstr(buf, "DEVNAME=");
- if (!str) {
- printf("%s: Could not read: %s", __func__, path);
- exit(EXIT_FAILURE);
- }
+ if (!str)
+ ksft_exit_fail_msg("%s: Could not read: %s", __func__, path);
str += 8;
end = str;
while (*end) {
@@ -197,16 +168,13 @@ static void get_finfo(const char *dir)
if (snprintf(finfo.dev_queue_read_ahead_path,
sizeof(finfo.dev_queue_read_ahead_path),
"/sys/block/%s/queue/read_ahead_kb",
- str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
+ str) >= sizeof(finfo.dev_queue_read_ahead_path))
+ ksft_exit_fail_msg("%s: Pathname is too long\n", __func__);
return;
}
++end;
}
- printf("%s: Could not read: %s\n", __func__, path);
- exit(EXIT_FAILURE);
+ ksft_exit_fail_msg("%s: Could not read: %s\n", __func__, path);
}
static bool check_swap(void *addr, unsigned long size)
@@ -219,26 +187,19 @@ static bool check_swap(void *addr, unsigned long size)
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
(unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
fp = fopen(PID_SMAPS, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
- exit(EXIT_FAILURE);
- }
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, PID_SMAPS);
if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
goto err_out;
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
size >> 10);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
/*
* Fetch the Swap: in the same block and check whether it got
* the expected number of hugeepages next.
@@ -261,10 +222,8 @@ static void *alloc_mapping(int nr)
p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (p != BASE_ADDR) {
- printf("Failed to allocate VMA at %p\n", BASE_ADDR);
- exit(EXIT_FAILURE);
- }
+ if (p != BASE_ADDR)
+ ksft_exit_fail_msg("Failed to allocate VMA at %p\n", BASE_ADDR);
return p;
}
@@ -314,19 +273,13 @@ static void *alloc_hpage(struct mem_ops *ops)
* khugepaged on low-load system (like a test machine), which
* would cause MADV_COLLAPSE to fail with EAGAIN.
*/
- printf("Allocate huge page...");
- if (madvise_collapse_retry(p, hpage_pmd_size)) {
- perror("madvise(MADV_COLLAPSE)");
- exit(EXIT_FAILURE);
- }
- if (!ops->check_huge(p, 1)) {
- perror("madvise(MADV_COLLAPSE)");
- exit(EXIT_FAILURE);
- }
- if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
- perror("madvise(MADV_HUGEPAGE)");
- exit(EXIT_FAILURE);
- }
+ ksft_print_msg("Allocate huge page...");
+ if (madvise_collapse_retry(p, hpage_pmd_size))
+ ksft_exit_fail_perror("madvise(MADV_COLLAPSE)");
+ if (!ops->check_huge(p, 1))
+ ksft_exit_fail_perror("madvise(MADV_COLLAPSE)");
+ if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE))
+ ksft_exit_fail_perror("madvise(MADV_HUGEPAGE)");
success("OK");
return p;
}
@@ -336,11 +289,9 @@ static void validate_memory(int *p, unsigned long start, unsigned long end)
int i;
for (i = start / page_size; i < end / page_size; i++) {
- if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
- printf("Page %d is corrupted: %#x\n",
- i, p[i * page_size / sizeof(*p)]);
- exit(EXIT_FAILURE);
- }
+ if (p[i * page_size / sizeof(*p)] != i + 0xdead0000)
+ ksft_exit_fail_msg("Page %d is corrupted: %#x\n",
+ i, p[i * page_size / sizeof(*p)]);
}
}
@@ -364,42 +315,52 @@ static bool anon_check_huge(void *addr, int nr_hpages)
return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
}
-static void *file_setup_area(int nr_hpages)
+static void *file_setup_area_common(int nr_hpages, enum file_setup_ops setup)
{
+ const int open_opt = setup == FILE_SETUP_READ_ONLY_FS ? O_RDONLY : O_RDWR;
+ const int mmap_prot = setup == FILE_SETUP_READ_ONLY_FS ? PROT_READ : (PROT_READ | PROT_WRITE);
int fd;
void *p;
unsigned long size;
unlink(finfo.path); /* Cleanup from previous failed tests */
- printf("Creating %s for collapse%s...", finfo.path,
- finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
- fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
+ ksft_print_msg("Creating %s for collapse%s...", finfo.path,
+ finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
+ fd = open(finfo.path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
777);
- if (fd < 0) {
- perror("open()");
- exit(EXIT_FAILURE);
- }
+ if (fd < 0)
+ ksft_exit_fail_perror("open()");
size = nr_hpages * hpage_pmd_size;
- p = alloc_mapping(nr_hpages);
- fill_memory(p, 0, size);
- write(fd, p, size);
- close(fd);
- munmap(p, size);
- success("OK");
-
- printf("Opening %s read only for collapse...", finfo.path);
- finfo.fd = open(finfo.path, O_RDONLY, 777);
- if (finfo.fd < 0) {
- perror("open()");
+ if (ftruncate(fd, size)) {
+ perror("ftruncate()");
exit(EXIT_FAILURE);
}
- p = mmap(BASE_ADDR, size, PROT_READ,
- MAP_PRIVATE, finfo.fd, 0);
- if (p == MAP_FAILED || p != BASE_ADDR) {
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (p != BASE_ADDR) {
perror("mmap()");
exit(EXIT_FAILURE);
}
+ fill_memory(p, 0, size);
+ if (msync(p, size, MS_SYNC)) {
+ perror("msync()");
+ exit(EXIT_FAILURE);
+ }
+ close(fd);
+ munmap(p, size);
+ success("OK");
+ ksft_print_msg("Opening %s %s for collapse...", finfo.path,
+ setup == FILE_SETUP_READ_ONLY_FS ? "read-only" :
+ setup == FILE_SETUP_READ_WRITE_FS_READ_DATA ?
+ "read-write (read)" :
+ "read-write (write)");
+ finfo.fd = open(finfo.path, open_opt, 777);
+ if (finfo.fd < 0)
+ ksft_exit_fail_perror("open()");
+ p = mmap(BASE_ADDR, size, mmap_prot, MAP_SHARED, finfo.fd, 0);
+ if (p == MAP_FAILED || p != BASE_ADDR)
+ ksft_exit_fail_perror("mmap()");
/* Drop page cache */
write_file("/proc/sys/vm/drop_caches", "3", 2);
@@ -407,6 +368,21 @@ static void *file_setup_area(int nr_hpages)
return p;
}
+static void *file_setup_read_only_area(int nr_hpages)
+{
+ return file_setup_area_common(nr_hpages, FILE_SETUP_READ_ONLY_FS);
+}
+
+static void *file_setup_read_write_fs_read_area(int nr_hpages)
+{
+ return file_setup_area_common(nr_hpages, FILE_SETUP_READ_WRITE_FS_READ_DATA);
+}
+
+static void *file_setup_read_write_fs_write_area(int nr_hpages)
+{
+ return file_setup_area_common(nr_hpages, FILE_SETUP_READ_WRITE_FS_WRITE_DATA);
+}
+
static void file_cleanup_area(void *p, unsigned long size)
{
munmap(p, size);
@@ -414,12 +390,26 @@ static void file_cleanup_area(void *p, unsigned long size)
unlink(finfo.path);
}
-static void file_fault(void *p, unsigned long start, unsigned long end)
+static void file_fault_read(void *p, unsigned long start, unsigned long end)
{
- if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
- perror("madvise(MADV_POPULATE_READ");
- exit(EXIT_FAILURE);
- }
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ))
+ ksft_exit_fail_perror("madvise(MADV_POPULATE_READ)");
+}
+
+static void file_fault_read_and_flush(void *p, unsigned long start, unsigned long end)
+{
+ file_fault_read(p, start, end);
+ /*
+ * make folio clean, since dirty folios from read&write file are
+ * rejected and not flushed
+ */
+ msync((char *)p + start, end - start, MS_SYNC);
+}
+
+static void file_fault_write(void *p, unsigned long start, unsigned long end)
+{
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_WRITE))
+ ksft_exit_fail_perror("madvise(MADV_POPULATE_WRITE)");
}
static bool file_check_huge(void *addr, int nr_hpages)
@@ -441,20 +431,14 @@ static void *shmem_setup_area(int nr_hpages)
unsigned long size = nr_hpages * hpage_pmd_size;
finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
- if (finfo.fd < 0) {
- perror("memfd_create()");
- exit(EXIT_FAILURE);
- }
- if (ftruncate(finfo.fd, size)) {
- perror("ftruncate()");
- exit(EXIT_FAILURE);
- }
+ if (finfo.fd < 0)
+ ksft_exit_fail_perror("memfd_create()");
+ if (ftruncate(finfo.fd, size))
+ ksft_exit_fail_perror("ftruncate()");
p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
0);
- if (p != BASE_ADDR) {
- perror("mmap()");
- exit(EXIT_FAILURE);
- }
+ if (p != BASE_ADDR)
+ ksft_exit_fail_perror("mmap()");
return p;
}
@@ -477,10 +461,26 @@ static struct mem_ops __anon_ops = {
.name = "anon",
};
-static struct mem_ops __file_ops = {
- .setup_area = &file_setup_area,
+static struct mem_ops __read_only_file_ops = {
+ .setup_area = &file_setup_read_only_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault_read,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __read_write_file_read_ops = {
+ .setup_area = &file_setup_read_write_fs_read_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault_read_and_flush,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __read_write_file_write_ops = {
+ .setup_area = &file_setup_read_write_fs_write_area,
.cleanup_area = &file_cleanup_area,
- .fault = &file_fault,
+ .fault = &file_fault_write,
.check_huge = &file_check_huge,
.name = "file",
};
@@ -493,13 +493,32 @@ static struct mem_ops __shmem_ops = {
.name = "shmem",
};
+static bool is_tmpfs(struct mem_ops *ops)
+{
+ return (ops == &__read_only_file_ops ||
+ ops == &__read_write_file_read_ops ||
+ ops == &__read_write_file_write_ops) &&
+ finfo.type == VMA_SHMEM;
+}
+
+static bool is_anon(struct mem_ops *ops)
+{
+ return ops == &__anon_ops;
+}
+
static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
struct mem_ops *ops, bool expect)
{
int ret;
struct thp_settings settings = *thp_current_settings();
- printf("%s...", msg);
+ ksft_print_msg("%s...", msg);
+
+ /*
+ * read&write file collapse succeeds for MADV_COLLAPSE because dirty
+ * folios are written back after collapse fails for dirty folios and
+ * another collapse is attempted.
+ */
/*
* Prevent khugepaged interference and tests that MADV_COLLAPSE
@@ -526,10 +545,8 @@ static void madvise_collapse(const char *msg, char *p, int nr_hpages,
struct mem_ops *ops, bool expect)
{
/* Sanity check */
- if (!ops->check_huge(p, 0)) {
- printf("Unexpected huge page\n");
- exit(EXIT_FAILURE);
- }
+ if (!ops->check_huge(p, 0))
+ ksft_exit_fail_msg("Unexpected huge page\n");
__madvise_collapse(msg, p, nr_hpages, ops, expect);
}
@@ -541,17 +558,15 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
int timeout = 6; /* 3 seconds */
/* Sanity check */
- if (!ops->check_huge(p, 0)) {
- printf("Unexpected huge page\n");
- exit(EXIT_FAILURE);
- }
+ if (!ops->check_huge(p, 0))
+ ksft_exit_fail_msg("Unexpected huge page\n");
madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
/* Wait until the second full_scan completed */
full_scans = thp_read_num("khugepaged/full_scans") + 2;
- printf("%s...", msg);
+ ksft_print_msg("%s...", msg);
while (timeout--) {
if (ops->check_huge(p, nr_hpages))
break;
@@ -567,6 +582,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
struct mem_ops *ops, bool expect)
{
+ /*
+ * read&write file collapse fails since khugepaged does not flush
+ * the target dirty folios
+ */
+ if (!is_tmpfs(ops) && ops == &__read_write_file_write_ops)
+ expect = false;
+
if (wait_for_scan(msg, p, nr_hpages, ops)) {
if (expect)
fail("Timeout");
@@ -601,16 +623,6 @@ static struct collapse_context __madvise_context = {
.name = "madvise",
};
-static bool is_tmpfs(struct mem_ops *ops)
-{
- return ops == &__file_ops && finfo.type == VMA_SHMEM;
-}
-
-static bool is_anon(struct mem_ops *ops)
-{
- return ops == &__anon_ops;
-}
-
static void alloc_at_fault(void)
{
struct thp_settings settings = *thp_current_settings();
@@ -621,7 +633,7 @@ static void alloc_at_fault(void)
p = alloc_mapping(1);
*p = 1;
- printf("Allocate huge page on fault...");
+ ksft_print_msg("Allocate huge page on fault...");
if (check_huge_anon(p, 1, hpage_pmd_size))
success("OK");
else
@@ -630,12 +642,14 @@ static void alloc_at_fault(void)
thp_pop_settings();
madvise(p, page_size, MADV_DONTNEED);
- printf("Split huge PMD on MADV_DONTNEED...");
+ ksft_print_msg("Split huge PMD on MADV_DONTNEED...");
if (check_huge_anon(p, 0, hpage_pmd_size))
success("OK");
else
fail("Fail");
munmap(p, hpage_pmd_size);
+
+ ksft_test_result_report(exit_status, "allocate on fault and split\n");
}
static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
@@ -650,6 +664,8 @@ static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
ops, true);
validate_memory(p, 0, size);
ops->cleanup_area(p, size);
+
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
@@ -659,6 +675,7 @@ static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
p = ops->setup_area(1);
c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
@@ -670,6 +687,7 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops
c->collapse("Collapse PTE table with single PTE entry present", p,
1, ops, true);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
@@ -697,6 +715,9 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o
validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
if (c->enforce_pte_scan_limits) {
+ ops->cleanup_area(p, hpage_pmd_size);
+ p = ops->setup_area(1);
+
ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
true);
@@ -706,6 +727,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o
skip:
ops->cleanup_area(p, hpage_pmd_size);
thp_pop_settings();
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
@@ -715,11 +737,9 @@ static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_op
p = ops->setup_area(1);
ops->fault(p, 0, hpage_pmd_size);
- printf("Swapout one page...");
- if (madvise(p, page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
+ ksft_print_msg("Swapout one page...");
+ if (madvise(p, page_size, MADV_PAGEOUT))
+ ksft_exit_fail_perror("madvise(MADV_PAGEOUT)");
if (check_swap(p, page_size)) {
success("OK");
} else {
@@ -732,6 +752,7 @@ static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_op
validate_memory(p, 0, hpage_pmd_size);
out:
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
@@ -742,11 +763,9 @@ static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *o
p = ops->setup_area(1);
ops->fault(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
- if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
+ ksft_print_msg("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+ if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT))
+ ksft_exit_fail_perror("madvise(MADV_PAGEOUT)");
if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
success("OK");
} else {
@@ -760,12 +779,10 @@ static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *o
if (c->enforce_pte_scan_limits) {
ops->fault(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap,
+ ksft_print_msg("Swapout %d of %d pages...", max_ptes_swap,
hpage_pmd_nr);
- if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT))
+ ksft_exit_fail_perror("madvise(MADV_PAGEOUT)");
if (check_swap(p, max_ptes_swap * page_size)) {
success("OK");
} else {
@@ -779,6 +796,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *o
}
out:
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
@@ -795,7 +813,7 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c, struc
}
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- printf("Split huge page leaving single PTE mapping compound page...");
+ ksft_print_msg("Split huge page leaving single PTE mapping compound page...");
madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
if (ops->check_huge(p, 0))
success("OK");
@@ -807,6 +825,7 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c, struc
validate_memory(p, 0, page_size);
skip:
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
@@ -814,7 +833,7 @@ static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops
void *p;
p = alloc_hpage(ops);
- printf("Split huge page leaving single PTE page table full of compound pages...");
+ ksft_print_msg("Split huge page leaving single PTE page table full of compound pages...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
if (ops->check_huge(p, 0))
@@ -826,6 +845,7 @@ static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops
true);
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
@@ -834,16 +854,12 @@ static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops
int i;
p = ops->setup_area(1);
+ ksft_print_msg("Construct PTE page table full of different PTE-mapped compound pages\n");
for (i = 0; i < hpage_pmd_nr; i++) {
- printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
- i + 1, hpage_pmd_nr);
-
madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
ops->fault(BASE_ADDR, 0, hpage_pmd_size);
- if (!ops->check_huge(BASE_ADDR, 1)) {
- printf("Failed to allocate huge page\n");
- exit(EXIT_FAILURE);
- }
+ if (!ops->check_huge(BASE_ADDR, 1))
+ ksft_exit_fail_msg("Failed to allocate huge page\n");
madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
p = mremap(BASE_ADDR - i * page_size,
@@ -851,20 +867,16 @@ static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops
(i + 1) * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED,
BASE_ADDR + 2 * hpage_pmd_size);
- if (p == MAP_FAILED) {
- perror("mremap+unmap");
- exit(EXIT_FAILURE);
- }
+ if (p == MAP_FAILED)
+ ksft_exit_fail_perror("mremap+unmap");
p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
(i + 1) * page_size,
(i + 1) * page_size + hpage_pmd_size,
MREMAP_MAYMOVE | MREMAP_FIXED,
BASE_ADDR - (i + 1) * page_size);
- if (p == MAP_FAILED) {
- perror("mremap+alloc");
- exit(EXIT_FAILURE);
- }
+ if (p == MAP_FAILED)
+ ksft_exit_fail_perror("mremap+alloc");
}
ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
@@ -879,6 +891,7 @@ static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
@@ -888,19 +901,16 @@ static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
p = ops->setup_area(1);
- printf("Allocate small page...");
+ ksft_print_msg("Allocate small page...");
ops->fault(p, 0, page_size);
if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- printf("Share small page over fork()...");
+ ksft_print_msg("Share small page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
if (ops->check_huge(p, 0))
success("OK");
else
@@ -912,19 +922,20 @@ static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
validate_memory(p, 0, page_size);
ops->cleanup_area(p, hpage_pmd_size);
- exit(exit_status);
+ _exit(exit_status);
}
wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
+ exit_status = WEXITSTATUS(wstatus);
- printf("Check if parent still has small page...");
+ ksft_print_msg("Check if parent still has small page...");
if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
validate_memory(p, 0, page_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
@@ -933,18 +944,15 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o
void *p;
p = alloc_hpage(ops);
- printf("Share huge page over fork()...");
+ ksft_print_msg("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
- printf("Split huge page PMD in child process...");
+ ksft_print_msg("Split huge page PMD in child process...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
if (ops->check_huge(p, 0))
@@ -961,19 +969,20 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
- exit(exit_status);
+ _exit(exit_status);
}
wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
+ exit_status = WEXITSTATUS(wstatus);
- printf("Check if parent still has huge page...");
+ ksft_print_msg("Check if parent still has huge page...");
if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
@@ -983,18 +992,15 @@ static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops
void *p;
p = alloc_hpage(ops);
- printf("Share huge page over fork()...");
+ ksft_print_msg("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
- printf("Trigger CoW on page %d of %d...",
+ ksft_print_msg("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
if (ops->check_huge(p, 0))
@@ -1006,7 +1012,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops
1, ops, !c->enforce_pte_scan_limits);
if (c->enforce_pte_scan_limits) {
- printf("Trigger CoW on page %d of %d...",
+ ksft_print_msg("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
page_size);
@@ -1021,19 +1027,20 @@ static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
- exit(exit_status);
+ _exit(exit_status);
}
wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
+ exit_status = WEXITSTATUS(wstatus);
- printf("Check if parent still has huge page...");
+ ksft_print_msg("Check if parent still has huge page...");
if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void madvise_collapse_existing_thps(struct collapse_context *c,
@@ -1050,6 +1057,7 @@ static void madvise_collapse_existing_thps(struct collapse_context *c,
__madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
/*
@@ -1077,6 +1085,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c,
true);
validate_memory(p, 0, size);
ops->cleanup_area(p, size);
+ ksft_test_result_report(exit_status, "%s\n", __func__);
}
static void usage(void)
@@ -1086,8 +1095,8 @@ static void usage(void)
fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
- fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
- fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires a file system\n");
+ fprintf(stderr, "\twith PMD-sized large folio support\n");
fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n");
fprintf(stderr, "\n\tSupported Options:\n");
@@ -1143,20 +1152,25 @@ static void parse_test_type(int argc, char **argv)
usage();
if (!strcmp(buf, "all")) {
- file_ops = &__file_ops;
+ read_only_file_ops = &__read_only_file_ops;
+ read_write_file_read_ops = &__read_write_file_read_ops;
+ read_write_file_write_ops = &__read_write_file_write_ops;
anon_ops = &__anon_ops;
shmem_ops = &__shmem_ops;
} else if (!strcmp(buf, "anon")) {
anon_ops = &__anon_ops;
} else if (!strcmp(buf, "file")) {
- file_ops = &__file_ops;
+ read_only_file_ops = &__read_only_file_ops;
+ read_write_file_read_ops = &__read_write_file_read_ops;
+ read_write_file_write_ops = &__read_write_file_write_ops;
} else if (!strcmp(buf, "shmem")) {
shmem_ops = &__shmem_ops;
} else {
usage();
}
- if (!file_ops)
+ if (!read_only_file_ops && !read_write_file_read_ops &&
+ !read_write_file_write_ops)
return;
if (argc != 2)
@@ -1165,6 +1179,32 @@ static void parse_test_type(int argc, char **argv)
get_finfo(argv[1]);
}
+typedef void (*test_fn)(struct collapse_context *c, struct mem_ops *ops);
+
+struct test_case {
+ struct collapse_context *ctx;
+ struct mem_ops *ops;
+ const char *desc;
+ test_fn fn;
+};
+
+#define MAX_TEST_CASES 64
+static struct test_case test_cases[MAX_TEST_CASES];
+static int nr_test_cases;
+
+#define TEST(t, c, o) do { \
+ if (c && o) { \
+ if (nr_test_cases >= MAX_TEST_CASES) \
+ ksft_exit_fail_msg("MAX_TEST_CASES is too small\n"); \
+ test_cases[nr_test_cases++] = (struct test_case){ \
+ .ctx = c, \
+ .ops = o, \
+ .desc = #t, \
+ .fn = t, \
+ }; \
+ } \
+ } while (0)
+
int main(int argc, char **argv)
{
int hpage_pmd_order;
@@ -1188,10 +1228,10 @@ int main(int argc, char **argv)
.read_ahead_kb = 0,
};
- if (!thp_is_enabled()) {
- printf("Transparent Hugepages not available\n");
- return KSFT_SKIP;
- }
+ ksft_print_header();
+
+ if (!thp_is_enabled())
+ ksft_exit_skip("Transparent Hugepages not available\n");
parse_test_type(argc, argv);
@@ -1199,10 +1239,8 @@ int main(int argc, char **argv)
page_size = getpagesize();
hpage_pmd_size = read_pmd_pagesize();
- if (!hpage_pmd_size) {
- printf("Reading PMD pagesize failed");
- exit(EXIT_FAILURE);
- }
+ if (!hpage_pmd_size)
+ ksft_exit_fail_msg("Reading PMD pagesize failed\n");
hpage_pmd_nr = hpage_pmd_size / page_size;
hpage_pmd_order = __builtin_ctz(hpage_pmd_nr);
@@ -1218,47 +1256,54 @@ int main(int argc, char **argv)
save_settings();
thp_push_settings(&default_settings);
- alloc_at_fault();
-
-#define TEST(t, c, o) do { \
- if (c && o) { \
- printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
- t(c, o); \
- } \
- } while (0)
-
TEST(collapse_full, khugepaged_context, anon_ops);
- TEST(collapse_full, khugepaged_context, file_ops);
+ TEST(collapse_full, khugepaged_context, read_only_file_ops);
+ TEST(collapse_full, khugepaged_context, read_write_file_read_ops);
+ TEST(collapse_full, khugepaged_context, read_write_file_write_ops);
TEST(collapse_full, khugepaged_context, shmem_ops);
TEST(collapse_full, madvise_context, anon_ops);
- TEST(collapse_full, madvise_context, file_ops);
+ TEST(collapse_full, madvise_context, read_only_file_ops);
+ TEST(collapse_full, madvise_context, read_write_file_read_ops);
+ TEST(collapse_full, madvise_context, read_write_file_write_ops);
TEST(collapse_full, madvise_context, shmem_ops);
TEST(collapse_empty, khugepaged_context, anon_ops);
TEST(collapse_empty, madvise_context, anon_ops);
TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
- TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, read_only_file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, read_write_file_read_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, read_write_file_write_ops);
TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
TEST(collapse_single_pte_entry, madvise_context, anon_ops);
- TEST(collapse_single_pte_entry, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, read_only_file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, read_write_file_read_ops);
+ TEST(collapse_single_pte_entry, madvise_context, read_write_file_write_ops);
TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
- TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, read_only_file_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, read_write_file_read_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, read_write_file_write_ops);
TEST(collapse_max_ptes_none, madvise_context, anon_ops);
- TEST(collapse_max_ptes_none, madvise_context, file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, read_only_file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, read_write_file_read_ops);
+ TEST(collapse_max_ptes_none, madvise_context, read_write_file_write_ops);
TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
- TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, read_only_file_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, read_write_file_read_ops);
TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
- TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, read_only_file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, read_write_file_read_ops);
TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
- TEST(collapse_full_of_compound, khugepaged_context, file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, read_only_file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, read_write_file_read_ops);
TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
TEST(collapse_full_of_compound, madvise_context, anon_ops);
- TEST(collapse_full_of_compound, madvise_context, file_ops);
+ TEST(collapse_full_of_compound, madvise_context, read_only_file_ops);
+ TEST(collapse_full_of_compound, madvise_context, read_write_file_read_ops);
TEST(collapse_full_of_compound, madvise_context, shmem_ops);
TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
@@ -1280,11 +1325,23 @@ int main(int argc, char **argv)
TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
- TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, read_only_file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, read_write_file_read_ops);
TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
- TEST(madvise_retracted_page_tables, madvise_context, file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, read_only_file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, read_write_file_read_ops);
TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
- restore_settings(0);
+ ksft_set_plan(nr_test_cases + 1);
+
+ alloc_at_fault();
+ for (int i = 0; i < nr_test_cases; i++) {
+ struct test_case *t = &test_cases[i];
+
+ ksft_print_msg("\n# Run test: %s (%s:%s)\n", t->desc, t->ctx->name, t->ops->name);
+ t->fn(t->ctx, t->ops);
+ }
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/ksft_kmemleak_dedup.sh b/tools/testing/selftests/mm/ksft_kmemleak_dedup.sh
new file mode 100755
index 000000000000..d01950244490
--- /dev/null
+++ b/tools/testing/selftests/mm/ksft_kmemleak_dedup.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression test for kmemleak's per-scan verbose dedup.
+#
+# Loads samples/kmemleak's helper module to generate orphan allocations
+# (some of which share an allocation backtrace), runs a few kmemleak
+# scans with verbose printing enabled, and verifies that no two
+# "unreferenced object" reports within a single scan share the same
+# backtrace - which would mean dedup failed to collapse them.
+#
+# This test is intentionally permissive: the kmemleak-test module's
+# leaks frequently get reported across many separate scans (per-CPU
+# chunk reuse, slab freelist pointers, kernel stack residue), so dedup
+# may never have anything to fold within one scan. That is not a
+# regression. The test only fails when it actually catches dedup not
+# happening on input that should have triggered it - i.e. two reports
+# with identical backtraces in the same scan.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+ksft_skip=4
+KMEMLEAK=/sys/kernel/debug/kmemleak
+VERBOSE_PARAM=/sys/module/kmemleak/parameters/verbose
+MODULE=kmemleak-test
+
+skip() {
+ echo "SKIP: $*"
+ exit $ksft_skip
+}
+
+fail() {
+ echo "FAIL: $*"
+ exit 1
+}
+
+pass() {
+ echo "PASS: $*"
+ exit 0
+}
+
+[ "$(id -u)" -eq 0 ] || skip "must run as root"
+[ -r "$KMEMLEAK" ] || skip "no kmemleak debugfs (CONFIG_DEBUG_KMEMLEAK)"
+[ -w "$VERBOSE_PARAM" ] || skip "kmemleak verbose param missing"
+modinfo "$MODULE" >/dev/null 2>&1 ||
+ skip "$MODULE not built (CONFIG_SAMPLE_KMEMLEAK)"
+
+# The verdict depends entirely on dmesg contents, so a silently-empty
+# dmesg (dmesg_restrict=1 with CAP_SYSLOG dropped, restricted container,
+# etc.) would let the script report PASS without parsing anything. Probe
+# both read and clear up front and skip cleanly if either is denied.
+dmesg >/dev/null 2>&1 ||
+ skip "cannot read dmesg (need CAP_SYSLOG or dmesg_restrict=0)"
+dmesg -C >/dev/null 2>&1 ||
+ skip "cannot clear dmesg (need CAP_SYSLOG or dmesg_restrict=0)"
+
+# kmemleak can be present but disabled at runtime (boot arg kmemleak=off,
+# or it self-disabled after an internal error). In that state writes other
+# than "clear" return EPERM, so probe once and skip if so.
+if ! echo scan > "$KMEMLEAK" 2>/dev/null; then
+ skip "kmemleak is disabled (check dmesg or kmemleak= boot arg)"
+fi
+
+prev_verbose=$(cat "$VERBOSE_PARAM")
+# shellcheck disable=SC2317 # invoked indirectly via trap
+cleanup() {
+ echo "$prev_verbose" > "$VERBOSE_PARAM" 2>/dev/null
+ rmmod "$MODULE" 2>/dev/null
+ # Drain the leak set we generated. Subsequent selftests (e.g.
+ # tools/testing/selftests/net/netfilter/nft_interface_stress.sh)
+ # fail on any non-empty kmemleak report, so leaving the helper
+ # module's intentional leaks behind would poison the rest of a
+ # kselftest run.
+ #
+ # Caveat: kmemleak_clear() only greys objects that have already
+ # been reported (OBJECT_REPORTED && unreferenced_object()). Helper
+ # allocations that stayed "still referenced" throughout the test
+ # (stale pointers in per-CPU chunks, slab freelists, kernel stacks)
+ # were never reported and are therefore not greyed by this clear -
+ # they remain tracked and a later scan can still surface them. Such
+ # leftovers are inherent to the kmemleak-test sample module and are
+ # not specific to this test; consumers that fail on any kmemleak
+ # output (rather than on the test-specific backtraces) need to be
+ # robust to that, or this test should be excluded from the run.
+ echo clear > "$KMEMLEAK" 2>/dev/null
+}
+trap cleanup EXIT
+
+echo 1 > "$VERBOSE_PARAM"
+
+# Drain the existing leak set so the next scan only reports our objects.
+echo clear > "$KMEMLEAK"
+
+# Re-clear dmesg now (the up-front probe also cleared it, but anything
+# logged between then and here - module unload chatter, the probe scan,
+# the verbose-param write - would otherwise pollute the parse window).
+dmesg -C >/dev/null
+
+# If the module was left loaded by a previous aborted run, modprobe would
+# be a no-op and the init function would not run, so no new leaks would be
+# generated. Force a clean state first.
+rmmod "$MODULE" 2>/dev/null
+modprobe "$MODULE" || skip "failed to load $MODULE"
+# Removing the module orphans the list elements without freeing them.
+rmmod "$MODULE" || skip "failed to unload $MODULE"
+
+# Run a handful of scans so kmemleak has the chance to age and report
+# the orphans. We do not require any particular number to be reported:
+# the regression check below operates on whatever lands in dmesg.
+#
+# Note: with CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y the kernel's own scan
+# thread can report and mark these orphans (OBJECT_REPORTED) before our
+# manual scans run, after which our scans will see nothing. The
+# lower-bound check below catches the case where that happens and the
+# manual scans also produce nothing.
+SCAN_COUNT=4
+SCAN_SLEEP=6
+for _ in $(seq 1 "$SCAN_COUNT"); do
+ echo scan > "$KMEMLEAK"
+ sleep "$SCAN_SLEEP"
+done
+
+# Strip the leading "[ nnn.nnnnnn] " dmesg timestamp prefix. Without
+# this, two identical stack frames printed from two reports in the same
+# scan would produce different per-frame strings (different timestamps)
+# and the duplicate-backtrace check below would not match them, silently
+# passing a real dedup regression. Doing the strip here makes the rest
+# of the parser timestamp-agnostic regardless of what dmesg defaults to.
+log=$(dmesg | sed 's/^\[[^]]*\] //')
+
+# After running the workload (modprobe + scans), dmesg should contain at
+# least the helper module's pr_info lines and our manual-scan output. An
+# empty capture here means dmesg succeeded earlier but is now denying us
+# the buffer (race with dmesg_restrict toggling, etc.); refuse to give a
+# verdict on no evidence.
+[ -n "$log" ] || skip "dmesg returned empty after running workload"
+
+# Lower bound: if kmemleak's own per-scan tally counted leaks but the
+# verbose path emitted no "unreferenced object" line, the verbose printer
+# itself is regressed - fail rather than silently passing on no input.
+new_leaks=$(echo "$log" |
+ sed -n 's/.*kmemleak: \([0-9]\+\) new suspected.*/\1/p' |
+ awk '{s+=$1} END{print s+0}')
+printed=$(echo "$log" | grep -c 'kmemleak: unreferenced object')
+if [ "$new_leaks" -gt 0 ] && [ "$printed" -eq 0 ]; then
+ fail "verbose path broken: $new_leaks leaks counted, 0 printed in $SCAN_COUNT scans"
+fi
+
+# Walk the log: split into per-scan chunks at "N new suspected memory
+# leaks" boundaries; within each chunk, capture each "unreferenced
+# object" report's backtrace and check that no backtrace is reported
+# more than once. A duplicate within a single scan means dedup failed
+# to collapse two leaks that share an allocation site.
+violations=$(echo "$log" | awk '
+ function flush_block() {
+ if (in_block) {
+ # Skip empty backtraces: leaks with trace_handle == 0
+ # (early-boot allocations or stack_depot_save() failures
+ # under memory pressure) are intentionally not deduped,
+ # so multiple such reports in one scan are expected and
+ # must not be flagged as a regression.
+ if (bt != "")
+ seen[bt]++
+ in_block = 0
+ collecting = 0
+ bt = ""
+ }
+ }
+ function check_and_reset( b) {
+ for (b in seen)
+ if (seen[b] > 1)
+ printf("backtrace seen %d times in one scan:\n%s\n",
+ seen[b], b)
+ delete seen
+ }
+ # Scan boundary: the per-scan summary line.
+ /kmemleak: [0-9]+ new suspected memory leaks/ {
+ flush_block()
+ check_and_reset()
+ next
+ }
+ # Start of a new "unreferenced object" report.
+ /kmemleak: unreferenced object/ {
+ flush_block()
+ in_block = 1
+ next
+ }
+ # Inside a report, the "backtrace (crc ...):" line switches us to
+ # backtrace-collecting mode.
+ in_block && /kmemleak:[[:space:]]+backtrace \(crc/ {
+ collecting = 1
+ next
+ }
+ # Once collecting, capture only deeply-indented "kmemleak: " lines
+ # (stack frames have 4+ spaces of indentation under "kmemleak: ";
+ # headers and the "... and N more" tail line have less). This stops
+ # unrelated kmemleak warns landing between reports from being lumped
+ # into the backtrace key, which would mask a genuine duplicate.
+ in_block && collecting && /kmemleak:[[:space:]]{4,}/ {
+ bt = bt $0 "\n"
+ next
+ }
+ END {
+ flush_block()
+ check_and_reset()
+ }
+')
+
+if [ -n "$violations" ]; then
+ echo "$violations"
+ fail "kmemleak dedup regression: same backtrace reported more than once in a single scan"
+fi
+
+# Count the dedup summary lines so the report distinguishes "dedup
+# actually fired" from "no same-backtrace leaks turned up to dedup".
+dedup_lines=$(echo "$log" | grep -c 'more object(s) with the same backtrace')
+
+if [ "$dedup_lines" -gt 0 ]; then
+ pass "no dedup violations across $SCAN_COUNT scans; dedup fired ($dedup_lines summary line(s) observed)"
+else
+ pass "no dedup violations across $SCAN_COUNT scans; dedup had nothing to collapse"
+fi
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 8d874c4754f3..31c06c72203f 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -498,6 +498,7 @@ static void test_prctl_fork(void)
static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
{
int ksm_fd;
+ size_t len;
ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
if (ksm_fd < 0)
@@ -506,11 +507,13 @@ static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
if (write(ksm_fd, "1", 1) != 1)
return -errno;
- if (write(pages_to_scan_fd, pages_to_scan, strlen(pages_to_scan)) <= 0)
- return -errno;
+ len = strlen(pages_to_scan);
+ if (write(pages_to_scan_fd, pages_to_scan, len) != len)
+ return -1;
- if (write(sleep_millisecs_fd, sleep_ms, strlen(sleep_ms)) <= 0)
- return -errno;
+ len = strlen(sleep_ms);
+ if (write(sleep_millisecs_fd, sleep_ms, len) != len)
+ return -1;
return 0;
}
@@ -526,11 +529,11 @@ static int stop_ksmd_and_restore_frequency(void)
if (write(ksm_fd, "2", 1) != 1)
return -errno;
- if (write(pages_to_scan_fd, "100", 3) <= 0)
- return -errno;
+ if (write(pages_to_scan_fd, "100", 3) != 3)
+ return -1;
- if (write(sleep_millisecs_fd, "20", 2) <= 0)
- return -errno;
+ if (write(sleep_millisecs_fd, "20", 2) != 2)
+ return -1;
return 0;
}
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index a0b48b839d54..a050f4840cfa 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -15,7 +15,7 @@
#include "kselftest.h"
#include <include/vdso/time64.h>
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
#define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -174,13 +174,13 @@ static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_
{
void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
- if (!map_ptr) {
- perror("mmap");
+ if (map_ptr == MAP_FAILED) {
+ ksft_perror("mmap");
return NULL;
}
memset(map_ptr, data, map_size);
if (mprotect(map_ptr, map_size, prot)) {
- perror("mprotect");
+ ksft_perror("mprotect");
munmap(map_ptr, map_size);
return NULL;
}
@@ -201,11 +201,11 @@ static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
return 1;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return 1;
}
if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
- printf("Scan time limit exceeded\n");
+ ksft_print_msg("Scan time limit exceeded\n");
return 1;
}
}
@@ -218,12 +218,12 @@ static int ksm_merge_pages(int merge_type, void *addr, size_t size,
{
if (merge_type == KSM_MERGE_MADVISE) {
if (madvise(addr, size, MADV_MERGEABLE)) {
- perror("madvise");
+ ksft_perror("madvise");
return 1;
}
} else if (merge_type == KSM_MERGE_PRCTL) {
if (prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0)) {
- perror("prctl");
+ ksft_perror("prctl");
return 1;
}
}
@@ -242,7 +242,7 @@ static int ksm_unmerge_pages(void *addr, size_t size,
struct timespec start_time, int timeout)
{
if (madvise(addr, size, MADV_UNMERGEABLE)) {
- perror("madvise");
+ ksft_perror("madvise");
return 1;
}
return 0;
@@ -324,7 +324,7 @@ static int check_ksm_merge(int merge_type, int mapping, int prot,
struct timespec start_time;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
@@ -338,7 +338,6 @@ static int check_ksm_merge(int merge_type, int mapping, int prot,
/* verify that the right number of pages are merged */
if (assert_ksm_pages_count(page_count)) {
- printf("OK\n");
munmap(map_ptr, page_size * page_count);
if (merge_type == KSM_MERGE_PRCTL)
prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
@@ -346,7 +345,6 @@ static int check_ksm_merge(int merge_type, int mapping, int prot,
}
err_out:
- printf("Not OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_FAIL;
}
@@ -358,7 +356,7 @@ static int check_ksm_unmerge(int merge_type, int mapping, int prot, int timeout,
int page_count = 2;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
@@ -380,13 +378,11 @@ static int check_ksm_unmerge(int merge_type, int mapping, int prot, int timeout,
/* check that unmerging was successful and 0 pages are currently merged */
if (assert_ksm_pages_count(0)) {
- printf("OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_PASS;
}
err_out:
- printf("Not OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_FAIL;
}
@@ -398,7 +394,7 @@ static int check_ksm_zero_page_merge(int merge_type, int mapping, int prot, long
struct timespec start_time;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
@@ -425,12 +421,10 @@ static int check_ksm_zero_page_merge(int merge_type, int mapping, int prot, long
else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
goto err_out;
- printf("OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_PASS;
err_out:
- printf("Not OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_FAIL;
}
@@ -465,16 +459,16 @@ static int check_ksm_numa_merge(int merge_type, int mapping, int prot, int timeo
int first_node;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
if (numa_available() < 0) {
- perror("NUMA support not enabled");
+ ksft_print_msg("NUMA support not enabled\n");
return KSFT_SKIP;
}
if (numa_num_configured_nodes() <= 1) {
- printf("At least 2 NUMA nodes must be available\n");
+ ksft_print_msg("At least 2 NUMA nodes must be available\n");
return KSFT_SKIP;
}
if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
@@ -485,7 +479,7 @@ static int check_ksm_numa_merge(int merge_type, int mapping, int prot, int timeo
numa1_map_ptr = numa_alloc_onnode(page_size, first_node);
numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node));
if (!numa1_map_ptr || !numa2_map_ptr) {
- perror("numa_alloc_onnode");
+ ksft_perror("numa_alloc_onnode");
return KSFT_FAIL;
}
@@ -510,13 +504,11 @@ static int check_ksm_numa_merge(int merge_type, int mapping, int prot, int timeo
numa_free(numa1_map_ptr, page_size);
numa_free(numa2_map_ptr, page_size);
- printf("OK\n");
return KSFT_PASS;
err_out:
numa_free(numa1_map_ptr, page_size);
numa_free(numa2_map_ptr, page_size);
- printf("Not OK\n");
return KSFT_FAIL;
}
@@ -529,7 +521,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
int pagemap_fd, n_normal_pages, n_huge_pages;
if (!thp_is_enabled()) {
- printf("Transparent Hugepages not available\n");
+ ksft_print_msg("Transparent Hugepages not available\n");
return KSFT_SKIP;
}
@@ -559,36 +551,35 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
else
n_huge_pages++;
}
- printf("Number of normal pages: %d\n", n_normal_pages);
- printf("Number of huge pages: %d\n", n_huge_pages);
+ ksft_print_msg("Number of normal pages: %d\n", n_normal_pages);
+ ksft_print_msg("Number of huge pages: %d\n", n_huge_pages);
memset(map_ptr, '*', len);
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
goto err_out;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
(end_time.tv_nsec - start_time.tv_nsec);
- printf("Total size: %lu MiB\n", map_size / MB);
- printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ ksft_print_msg("Total size: %lu MiB\n", map_size / MB);
+ ksft_print_msg("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
scan_time_ns % NSEC_PER_SEC);
- printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ksft_print_msg("Average speed: %.3f MiB/s\n", (map_size / MB) /
((double)scan_time_ns / NSEC_PER_SEC));
munmap(map_ptr_orig, len + HPAGE_SIZE);
return KSFT_PASS;
err_out:
- printf("Not OK\n");
munmap(map_ptr_orig, len + HPAGE_SIZE);
return KSFT_FAIL;
}
@@ -606,30 +597,29 @@ static int ksm_merge_time(int merge_type, int mapping, int prot, int timeout, si
return KSFT_FAIL;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
goto err_out;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
(end_time.tv_nsec - start_time.tv_nsec);
- printf("Total size: %lu MiB\n", map_size / MB);
- printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ ksft_print_msg("Total size: %lu MiB\n", map_size / MB);
+ ksft_print_msg("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
scan_time_ns % NSEC_PER_SEC);
- printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ksft_print_msg("Average speed: %.3f MiB/s\n", (map_size / MB) /
((double)scan_time_ns / NSEC_PER_SEC));
munmap(map_ptr, map_size);
return KSFT_PASS;
err_out:
- printf("Not OK\n");
munmap(map_ptr, map_size);
return KSFT_FAIL;
}
@@ -646,37 +636,36 @@ static int ksm_unmerge_time(int merge_type, int mapping, int prot, int timeout,
if (!map_ptr)
return KSFT_FAIL;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
goto err_out;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout))
goto err_out;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
(end_time.tv_nsec - start_time.tv_nsec);
- printf("Total size: %lu MiB\n", map_size / MB);
- printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ ksft_print_msg("Total size: %lu MiB\n", map_size / MB);
+ ksft_print_msg("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
scan_time_ns % NSEC_PER_SEC);
- printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ksft_print_msg("Average speed: %.3f MiB/s\n", (map_size / MB) /
((double)scan_time_ns / NSEC_PER_SEC));
munmap(map_ptr, map_size);
return KSFT_PASS;
err_out:
- printf("Not OK\n");
munmap(map_ptr, map_size);
return KSFT_FAIL;
}
@@ -695,24 +684,24 @@ static int ksm_cow_time(int merge_type, int mapping, int prot, int timeout, size
return KSFT_FAIL;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
for (size_t i = 0; i < page_count - 1; i = i + 2)
memset(map_ptr + page_size * i, '-', 1);
if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
return KSFT_FAIL;
}
cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
(end_time.tv_nsec - start_time.tv_nsec);
- printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
- printf("Not merged pages:\n");
- printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ ksft_print_msg("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ ksft_print_msg("Not merged pages:\n");
+ ksft_print_msg("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
cow_time_ns % NSEC_PER_SEC);
- printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ksft_print_msg("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
((double)cow_time_ns / NSEC_PER_SEC));
/* Create 2000 pairs of duplicate pages */
@@ -724,30 +713,29 @@ static int ksm_cow_time(int merge_type, int mapping, int prot, int timeout, size
goto err_out;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
for (size_t i = 0; i < page_count - 1; i = i + 2)
memset(map_ptr + page_size * i, '-', 1);
if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
- perror("clock_gettime");
+ ksft_perror("clock_gettime");
goto err_out;
}
cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
(end_time.tv_nsec - start_time.tv_nsec);
- printf("Merged pages:\n");
- printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ ksft_print_msg("Merged pages:\n");
+ ksft_print_msg("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
cow_time_ns % NSEC_PER_SEC);
- printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ksft_print_msg("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
((double)cow_time_ns / NSEC_PER_SEC));
munmap(map_ptr, page_size * page_count);
return KSFT_PASS;
err_out:
- printf("Not OK\n");
munmap(map_ptr, page_size * page_count);
return KSFT_FAIL;
}
@@ -765,6 +753,10 @@ int main(int argc, char *argv[])
bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
long size_MB = 0;
+ const char *test_descr = "KSM merging";
+
+ ksft_print_header();
+ ksft_set_plan(1);
while ((opt = getopt(argc, argv, "dha:p:l:z:m:s:t:MUZNPCHD")) != -1) {
switch (opt) {
@@ -773,17 +765,13 @@ int main(int argc, char *argv[])
break;
case 'p':
page_count = atol(optarg);
- if (page_count <= 0) {
- printf("The number of pages must be greater than 0\n");
- return KSFT_FAIL;
- }
+ if (page_count <= 0)
+ ksft_exit_fail_msg("The number of pages must be greater than 0\n");
break;
case 'l':
ksm_scan_limit_sec = atoi(optarg);
- if (ksm_scan_limit_sec <= 0) {
- printf("Timeout value must be greater than 0\n");
- return KSFT_FAIL;
- }
+ if (ksm_scan_limit_sec <= 0)
+ ksft_exit_fail_msg("Timeout value must be greater than 0\n");
break;
case 'h':
print_help();
@@ -805,19 +793,15 @@ int main(int argc, char *argv[])
break;
case 's':
size_MB = atoi(optarg);
- if (size_MB <= 0) {
- printf("Size must be greater than 0\n");
- return KSFT_FAIL;
- }
+ if (size_MB <= 0)
+ ksft_exit_fail_msg("Size must be greater than 0\n");
break;
case 't':
{
int tmp = atoi(optarg);
- if (tmp < 0 || tmp > KSM_MERGE_LAST) {
- printf("Invalid merge type\n");
- return KSFT_FAIL;
- }
+ if (tmp < 0 || tmp > KSM_MERGE_LAST)
+ ksft_exit_fail_msg("Invalid merge type\n");
merge_type = tmp;
}
break;
@@ -845,82 +829,80 @@ int main(int argc, char *argv[])
test_name = KSM_COW_TIME;
break;
default:
- return KSFT_FAIL;
+ ksft_exit_fail_msg("Unknown option\n");
}
}
if (prot == 0)
prot = str_to_prot(KSM_PROT_STR_DEFAULT);
- if (access(KSM_SYSFS_PATH, F_OK)) {
- printf("Config KSM not enabled\n");
- return KSFT_SKIP;
- }
+ if (access(KSM_SYSFS_PATH, F_OK))
+ ksft_exit_skip("Config KSM not enabled\n");
- if (ksm_save_def(&ksm_sysfs_old)) {
- printf("Cannot save default tunables\n");
- return KSFT_FAIL;
- }
+ if (ksm_save_def(&ksm_sysfs_old))
+ ksft_exit_fail_msg("Cannot save default tunables\n");
if (ksm_write_sysfs(KSM_FP("run"), 2) ||
ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
numa_available() ? 0 :
ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
- return KSFT_FAIL;
+ ksft_exit_fail_msg("Cannot set up KSM tunables\n");
switch (test_name) {
case CHECK_KSM_MERGE:
+ test_descr = "KSM merging";
ret = check_ksm_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
ksm_scan_limit_sec, page_size);
break;
case CHECK_KSM_UNMERGE:
+ test_descr = "KSM unmerging";
ret = check_ksm_unmerge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, page_size);
break;
case CHECK_KSM_ZERO_PAGE_MERGE:
+ test_descr = "KSM zero page merging";
ret = check_ksm_zero_page_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
page_count, ksm_scan_limit_sec, use_zero_pages,
page_size);
break;
case CHECK_KSM_NUMA_MERGE:
+ test_descr = "KSM NUMA merging";
ret = check_ksm_numa_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, merge_across_nodes, page_size);
break;
case KSM_MERGE_TIME:
- if (size_MB == 0) {
- printf("Option '-s' is required.\n");
- return KSFT_FAIL;
- }
+ if (size_MB == 0)
+ ksft_exit_fail_msg("Option '-s' is required\n");
+ test_descr = "KSM merge time";
ret = ksm_merge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, size_MB);
break;
case KSM_MERGE_TIME_HUGE_PAGES:
- if (size_MB == 0) {
- printf("Option '-s' is required.\n");
- return KSFT_FAIL;
- }
+ if (size_MB == 0)
+ ksft_exit_fail_msg("Option '-s' is required\n");
+ test_descr = "KSM merge time with huge pages";
ret = ksm_merge_hugepages_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, size_MB);
break;
case KSM_UNMERGE_TIME:
- if (size_MB == 0) {
- printf("Option '-s' is required.\n");
- return KSFT_FAIL;
- }
+ if (size_MB == 0)
+ ksft_exit_fail_msg("Option '-s' is required\n");
+ test_descr = "KSM unmerge time";
ret = ksm_unmerge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, size_MB);
break;
case KSM_COW_TIME:
+ test_descr = "KSM COW time";
ret = ksm_cow_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
ksm_scan_limit_sec, page_size);
break;
}
- if (ksm_restore(&ksm_sysfs_old)) {
- printf("Cannot restore default tunables\n");
- return KSFT_FAIL;
- }
+ if (ksm_restore(&ksm_sysfs_old))
+ ksft_print_msg("Cannot restore default tunables\n");
+
+ ksft_test_result_report(ret, "%s\n", test_descr);
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index 88050e0f829a..7fce5d0b622b 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -34,7 +34,7 @@ static void sense_support(void)
addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
- if (!addr)
+ if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ret = madvise(addr, pagesize, MADV_POPULATE_READ);
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
deleted file mode 100644
index aa409107611b..000000000000
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Example of using hugepage memory in a user application using the mmap
- * system call with MAP_HUGETLB flag. Before running this program make
- * sure the administrator has allocated enough default sized huge pages
- * to cover the 256 MB allocation.
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include "vm_util.h"
-#include "kselftest.h"
-
-#define LENGTH (256UL*1024*1024)
-#define PROTECTION (PROT_READ | PROT_WRITE)
-
-static void check_bytes(char *addr)
-{
- ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
-}
-
-static void write_bytes(char *addr, size_t length)
-{
- unsigned long i;
-
- for (i = 0; i < length; i++)
- *(addr + i) = (char)i;
-}
-
-static void read_bytes(char *addr, size_t length)
-{
- unsigned long i;
-
- check_bytes(addr);
- for (i = 0; i < length; i++)
- if (*(addr + i) != (char)i)
- ksft_exit_fail_msg("Mismatch at %lu\n", i);
-
- ksft_test_result_pass("Read correct data\n");
-}
-
-int main(int argc, char **argv)
-{
- void *addr;
- size_t hugepage_size;
- size_t length = LENGTH;
- int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
- int shift = 0;
-
- hugepage_size = default_huge_page_size();
- /* munmap with fail if the length is not page aligned */
- if (hugepage_size > length)
- length = hugepage_size;
-
- ksft_print_header();
- ksft_set_plan(1);
-
- if (argc > 1)
- length = atol(argv[1]) << 20;
- if (argc > 2) {
- shift = atoi(argv[2]);
- if (shift)
- flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
- }
-
- if (shift)
- ksft_print_msg("%u kB hugepages\n", 1 << (shift - 10));
- else
- ksft_print_msg("Default size hugepages\n");
- ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
-
- addr = mmap(NULL, length, PROTECTION, flags, -1, 0);
- if (addr == MAP_FAILED)
- ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
-
- ksft_print_msg("Returned address is %p\n", addr);
- check_bytes(addr);
- write_bytes(addr, length);
- read_bytes(addr, length);
-
- /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, length))
- ksft_exit_fail_msg("munmap: %s\n", strerror(errno));
-
- ksft_finished();
-}
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 60e78bbfc0e3..29f7492453d4 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -5,7 +5,7 @@
*/
#include "kselftest_harness.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#include <strings.h>
#include <pthread.h>
@@ -23,6 +23,8 @@
#define MAX_RETRIES 100
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+HUGETLB_SETUP_DEFAULT_PAGES(1)
+
FIXTURE(migration)
{
pthread_t *threads;
@@ -32,13 +34,26 @@ FIXTURE(migration)
int n2;
};
+static void reset_signals(void)
+{
+ struct sigaction sa = { .sa_handler = SIG_DFL };
+
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGTERM, &sa, NULL);
+ sigaction(SIGHUP, &sa, NULL);
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGQUIT, &sa, NULL);
+}
+
FIXTURE_SETUP(migration)
{
int n;
+ reset_signals();
+
if (numa_available() < 0)
SKIP(return, "NUMA not available");
- self->nthreads = numa_num_task_cpus() - 1;
+ self->nthreads = numa_num_task_cpus() - 2;
self->n1 = -1;
self->n2 = -1;
@@ -52,6 +67,9 @@ FIXTURE_SETUP(migration)
}
}
+ if (self->nthreads < 1 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
self->threads = malloc(self->nthreads * sizeof(*self->threads));
ASSERT_NE(self->threads, NULL);
self->pids = malloc(self->nthreads * sizeof(*self->pids));
@@ -64,6 +82,29 @@ FIXTURE_TEARDOWN(migration)
free(self->pids);
}
+static bool kill_children(FIXTURE_DATA(migration) * self)
+{
+ bool err = false;
+ pid_t pid;
+ int i;
+
+ for (i = 0; i < self->nthreads; i++) {
+ int status = 0;
+
+ pid = self->pids[i];
+ if (pid < 0)
+ continue;
+ if (kill(pid, SIGTERM))
+ err = true;
+ if (pid != waitpid(pid, &status, 0))
+ err = true;
+ if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGTERM)
+ err = true;
+ }
+
+ return !err;
+}
+
int migrate(uint64_t *ptr, int n1, int n2)
{
int ret, tmp;
@@ -127,20 +168,17 @@ TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
uint64_t *ptr;
int i;
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
-
ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++)
+ for (i = 0; i < self->nthreads; i++)
if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
perror("Couldn't create thread");
ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
+ for (i = 0; i < self->nthreads; i++)
ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
}
@@ -151,17 +189,14 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
{
pid_t pid;
uint64_t *ptr;
- int i;
-
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
+ int i, err;
ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++) {
+ for (i = 0; i < self->nthreads; i++) {
pid = fork();
if (!pid) {
prctl(PR_SET_PDEATHSIG, SIGHUP);
@@ -174,9 +209,9 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
}
}
- ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
- ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+ err = migrate(ptr, self->n1, self->n2);
+ ASSERT_EQ(kill_children(self), true);
+ ASSERT_EQ(err, 0);
}
/*
@@ -184,28 +219,30 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
*/
TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
{
+ uint64_t pmdsize;
uint64_t *ptr;
int i;
if (!thp_is_enabled())
SKIP(return, "Transparent Hugepages not available");
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
+ pmdsize = read_pmd_pagesize();
+ if (!pmdsize)
+ SKIP(return, "Reading PMD pagesize failed");
- ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+ ptr = mmap(NULL, 2 * pmdsize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
- ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
- ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
- memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++)
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, pmdsize);
+ ASSERT_EQ(madvise(ptr, pmdsize, MADV_HUGEPAGE), 0);
+ memset(ptr, 0xde, pmdsize);
+ for (i = 0; i < self->nthreads; i++)
if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
perror("Couldn't create thread");
ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
+ for (i = 0; i < self->nthreads; i++)
ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
}
@@ -215,25 +252,27 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
{
+ uint64_t pmdsize;
pid_t pid;
uint64_t *ptr;
- int i;
+ int i, err;
if (!thp_is_enabled())
SKIP(return, "Transparent Hugepages not available");
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
+ pmdsize = read_pmd_pagesize();
+ if (!pmdsize)
+ SKIP(return, "Reading PMD pagesize failed");
- ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE,
+ ptr = mmap(NULL, 2 * pmdsize, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
- ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
- ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, pmdsize);
+ ASSERT_EQ(madvise(ptr, pmdsize, MADV_HUGEPAGE), 0);
- memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++) {
+ memset(ptr, 0xde, pmdsize);
+ for (i = 0; i < self->nthreads; i++) {
pid = fork();
if (!pid) {
prctl(PR_SET_PDEATHSIG, SIGHUP);
@@ -246,9 +285,9 @@ TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
}
}
- ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
- ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+ err = migrate(ptr, self->n1, self->n2);
+ ASSERT_EQ(kill_children(self), true);
+ ASSERT_EQ(err, 0);
}
/*
@@ -256,23 +295,28 @@ TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
*/
TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME)
{
+ unsigned long hugepage_size;
uint64_t *ptr;
int i;
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
+ hugepage_size = default_huge_page_size();
+ if (!hugepage_size)
+ SKIP(return, "Reading HugeTLB pagesize failed");
- ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ if (hugetlb_free_default_pages() < 1)
+ SKIP(return, "Not enough huge pages");
+
+ ptr = mmap(NULL, hugepage_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
- memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++)
+ memset(ptr, 0xde, hugepage_size);
+ for (i = 0; i < self->nthreads; i++)
if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
perror("Couldn't create thread");
ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
+ for (i = 0; i < self->nthreads; i++)
ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
}
@@ -281,19 +325,24 @@ TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME)
*/
TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME)
{
+ unsigned long hugepage_size;
pid_t pid;
uint64_t *ptr;
- int i;
+ int i, err;
- if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
- SKIP(return, "Not enough threads or NUMA nodes available");
+ hugepage_size = default_huge_page_size();
+ if (!hugepage_size)
+ SKIP(return, "Reading HugeTLB pagesize failed");
- ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ if (hugetlb_free_default_pages() < 1)
+ SKIP(return, "Not enough huge pages");
+
+ ptr = mmap(NULL, hugepage_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
- memset(ptr, 0xde, TWOMEG);
- for (i = 0; i < self->nthreads - 1; i++) {
+ memset(ptr, 0xde, hugepage_size);
+ for (i = 0; i < self->nthreads; i++) {
pid = fork();
if (!pid) {
prctl(PR_SET_PDEATHSIG, SIGHUP);
@@ -306,9 +355,9 @@ TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME)
}
}
- ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
- for (i = 0; i < self->nthreads - 1; i++)
- ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+ err = migrate(ptr, self->n1, self->n2);
+ ASSERT_EQ(kill_children(self), true);
+ ASSERT_EQ(err, 0);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index b474f2b20def..e16e288cc7c1 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
@@ -163,14 +164,17 @@ static int lock_check(unsigned long addr)
return (vma_rss == vma_size);
}
-static int unlock_lock_check(char *map)
+static int unlock_lock_check(char *map, bool mlock_supported)
{
- if (is_vmflag_set((unsigned long)map, LOCKED)) {
+ if (!is_vmflag_set((unsigned long)map, LOCKED))
+ return 0;
+
+ if (mlock_supported)
ksft_print_msg("VMA flag %s is present on page 1 after unlock\n", LOCKED);
- return 1;
- }
+ else
+ ksft_print_msg("VMA flag %s is present on an unsupported VMA\n", LOCKED);
- return 0;
+ return 1;
}
static void test_mlock_lock(void)
@@ -196,7 +200,7 @@ static void test_mlock_lock(void)
ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__);
+ ksft_test_result(!unlock_lock_check(map, true), "%s: Unlocked\n", __func__);
munmap(map, 2 * page_size);
}
@@ -296,7 +300,7 @@ static void test_munlockall0(void)
ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
}
- ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
+ ksft_test_result(!unlock_lock_check(map, true), "%s: No locked memory\n", __func__);
munmap(map, 2 * page_size);
}
@@ -336,7 +340,67 @@ static void test_munlockall1(void)
ksft_exit_fail_msg("munlockall() %s\n", strerror(errno));
}
- ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
+ ksft_test_result(!unlock_lock_check(map, true), "%s: No locked memory\n", __func__);
+ munmap(map, 2 * page_size);
+}
+
+/* Droppable memory should not be lockable. */
+static void test_mlock_droppable(void)
+{
+ char *map;
+ unsigned long page_size = getpagesize();
+
+ /* Ensure MCL_FUTURE is not set. */
+ if (munlockall()) {
+ ksft_test_result_fail("munlockall() %s\n", strerror(errno));
+ return;
+ }
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0);
+ if (map == MAP_FAILED) {
+ if ((errno == EOPNOTSUPP) || (errno == EINVAL))
+ ksft_test_result_skip("%s: MAP_DROPPABLE not supported\n", __func__);
+ else
+ ksft_test_result_fail("mmap error: %s\n", strerror(errno));
+ return;
+ }
+
+ if (mlock2_(map, 2 * page_size, 0))
+ ksft_test_result_fail("mlock2(0): %s\n", strerror(errno));
+ else
+ ksft_test_result(!unlock_lock_check(map, false),
+ "%s: droppable memory not locked\n", __func__);
+
+ munmap(map, 2 * page_size);
+}
+
+static void test_mlockall_future_droppable(void)
+{
+ char *map;
+ unsigned long page_size = getpagesize();
+
+ if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+ ksft_test_result_fail("mlockall(MCL_CURRENT | MCL_FUTURE): %s\n", strerror(errno));
+ return;
+ }
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0);
+
+ if (map == MAP_FAILED) {
+ if ((errno == EOPNOTSUPP) || (errno == EINVAL))
+ ksft_test_result_skip("%s: MAP_DROPPABLE not supported\n", __func__);
+ else
+ ksft_test_result_fail("mmap error: %s\n", strerror(errno));
+ munlockall();
+ return;
+ }
+
+ ksft_test_result(!unlock_lock_check(map, false), "%s: droppable memory not locked\n",
+ __func__);
+
+ munlockall();
munmap(map, 2 * page_size);
}
@@ -442,7 +506,7 @@ int main(int argc, char **argv)
munmap(map, size);
- ksft_set_plan(13);
+ ksft_set_plan(15);
test_mlock_lock();
test_mlock_onfault();
@@ -451,6 +515,8 @@ int main(int argc, char **argv)
test_lock_onfault_of_present();
test_vma_management(true);
test_mlockall();
+ test_mlock_droppable();
+ test_mlockall_future_droppable();
ksft_finished();
}
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 308576437228..131d9d6db867 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -76,27 +76,6 @@ enum {
.expect_failure = should_fail \
}
-/* compute square root using binary search */
-static unsigned long get_sqrt(unsigned long val)
-{
- unsigned long low = 1;
-
- /* assuming rand_size is less than 1TB */
- unsigned long high = (1UL << 20);
-
- while (low <= high) {
- unsigned long mid = low + (high - low) / 2;
- unsigned long temp = mid * mid;
-
- if (temp == val)
- return mid;
- if (temp < val)
- low = mid + 1;
- high = mid - 1;
- }
- return low;
-}
-
/*
* Returns false if the requested remap region overlaps with an
* existing mapping (e.g text, stack) else returns true.
@@ -995,11 +974,9 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
char *rand_addr)
{
void *addr, *tmp_addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
- unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
unsigned long long threshold;
- unsigned long num_chunks;
if (threshold_mb == VALIDATION_NO_THRESHOLD)
threshold = c.region_size;
@@ -1068,87 +1045,21 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
goto clean_up_dest_preamble;
}
- /*
- * Verify byte pattern after remapping. Employ an algorithm with a
- * square root time complexity in threshold: divide the range into
- * chunks, if memcmp() returns non-zero, only then perform an
- * iteration in that chunk to find the mismatch index.
- */
- num_chunks = get_sqrt(threshold);
- for (unsigned long i = 0; i < num_chunks; ++i) {
- size_t chunk_size = threshold / num_chunks;
- unsigned long shift = i * chunk_size;
-
- if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size))
- continue;
-
- /* brute force iteration only over mismatch segment */
- for (t = shift; t < shift + chunk_size; ++t) {
- if (((char *) dest_addr)[t] != rand_addr[t]) {
- ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- t);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
- ((char *) dest_addr)[t] & 0xff);
- ret = -1;
- goto clean_up_dest;
- }
- }
- }
-
- /*
- * if threshold is not divisible by num_chunks, then check the
- * last chunk
- */
- for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) {
- if (((char *) dest_addr)[t] != rand_addr[t]) {
- ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- t);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
- ((char *) dest_addr)[t] & 0xff);
- ret = -1;
- goto clean_up_dest;
- }
+ /* Verify byte pattern after remapping */
+ if (memcmp(dest_addr, rand_addr, threshold)) {
+ ksft_print_msg("Data after remap doesn't match\n");
+ ret = -1;
+ goto clean_up_dest;
}
/* Verify the dest preamble byte pattern after remapping */
- if (!c.dest_preamble_size)
- goto no_preamble;
-
- num_chunks = get_sqrt(c.dest_preamble_size);
-
- for (unsigned long i = 0; i < num_chunks; ++i) {
- size_t chunk_size = c.dest_preamble_size / num_chunks;
- unsigned long shift = i * chunk_size;
-
- if (!memcmp(dest_preamble_addr + shift, rand_addr + shift,
- chunk_size))
- continue;
-
- /* brute force iteration only over mismatched segment */
- for (d = shift; d < shift + chunk_size; ++d) {
- if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
- ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
- d);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
- ((char *) dest_preamble_addr)[d] & 0xff);
- ret = -1;
- goto clean_up_dest;
- }
- }
- }
-
- for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) {
- if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
- ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
- d);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
- ((char *) dest_preamble_addr)[d] & 0xff);
- ret = -1;
- goto clean_up_dest;
- }
+ if (c.dest_preamble_size &&
+ memcmp(dest_preamble_addr, rand_addr, c.dest_preamble_size)) {
+ ksft_print_msg("Preamble data after remap doesn't match\n");
+ ret = -1;
+ goto clean_up_dest;
}
-no_preamble:
start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
ret = end_ns - start_ns;
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c
index e806c1866e36..c8584d0fdeab 100644
--- a/tools/testing/selftests/mm/page_frag/page_frag_test.c
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -131,6 +131,8 @@ static int __init page_frag_test_init(void)
init_completion(&wait);
if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 ||
+ test_push_cpu < 0 || test_push_cpu >= nr_cpu_ids ||
+ test_pop_cpu < 0 || test_pop_cpu >= nr_cpu_ids ||
!cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu))
return -EINVAL;
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 7f9428d6062c..762306177ad8 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -7,8 +7,6 @@
#include <sys/mman.h>
#include <errno.h>
#include <malloc.h>
-#include "vm_util.h"
-#include "kselftest.h"
#include <linux/types.h>
#include <linux/memfd.h>
#include <linux/userfaultfd.h>
@@ -23,6 +21,10 @@
#include <sys/ipc.h>
#include <sys/shm.h>
+#include "vm_util.h"
+#include "kselftest.h"
+#include "hugepage_settings.h"
+
#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
@@ -1554,6 +1556,9 @@ int main(int __attribute__((unused)) argc, char *argv[])
if (init_uffd())
ksft_exit_skip("Failed to initialize userfaultfd\n");
+ if (!hugetlb_setup_default(4))
+ ksft_print_msg("HugeTLB test will be skipped\n");
+
ksft_set_plan(117);
page_size = getpagesize();
@@ -1605,7 +1610,7 @@ int main(int __attribute__((unused)) argc, char *argv[])
}
/* 5. SHM Hugetlb page testing */
- mem_size = 2*1024*1024;
+ mem_size = default_huge_page_size();
mem = gethugetlb_mem(mem_size, &shmid);
if (mem) {
wp_init(mem, mem_size);
@@ -1633,7 +1638,7 @@ int main(int __attribute__((unused)) argc, char *argv[])
}
/* 7. File Hugetlb testing */
- mem_size = 2*1024*1024;
+ mem_size = default_huge_page_size();
fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
if (fd < 0)
ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno));
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 7c29f075e40b..2c377f4e9df1 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -71,13 +71,14 @@ static inline void sigsafe_printf(const char *format, ...)
extern void abort_hooks(void);
#define pkey_assert(condition) do { \
if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
+ dprintf0("# assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("# errno at assert: %d\n", errno); \
+ abort_hooks(); \
+ ksft_exit_fail_msg("test %d (iteration %d)\n", \
+ test_nr, iteration_nr); \
+ } \
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c
index ca27200596a4..d8d9d1de57b8 100644
--- a/tools/testing/selftests/mm/prctl_thp_disable.c
+++ b/tools/testing/selftests/mm/prctl_thp_disable.c
@@ -14,7 +14,7 @@
#include <sys/wait.h>
#include "kselftest_harness.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#include "vm_util.h"
#ifndef PR_THP_DISABLE_EXCEPT_ADVISED
diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c
index cd4610baf5d7..3fffd5f7e6fb 100644
--- a/tools/testing/selftests/mm/process_madv.c
+++ b/tools/testing/selftests/mm/process_madv.c
@@ -310,6 +310,34 @@ TEST_F(process_madvise, invalid_vlen)
}
/*
+ * Test that invalid advice is rejected even when the iovec has zero total
+ * length. A request with valid advice and zero length is a noop, but
+ * invalid advice should still fail with EINVAL.
+ */
+TEST_F(process_madvise, invalid_advice_zero_length)
+{
+ struct iovec vec = {
+ .iov_base = NULL,
+ .iov_len = 0,
+ };
+ int pidfd = self->pidfd;
+ ssize_t ret;
+
+ errno = 0;
+ ret = sys_process_madvise(pidfd, &vec, 1, -1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ errno = 0;
+ ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = sys_process_madvise(pidfd, NULL, 0, -1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+/*
* Test process_madvise() with an invalid flag value. Currently, only a flag
* value of 0 is supported. This test is reserved for the future, e.g., if
* synchronous flags are added.
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 2085982dba69..9a6d954ee371 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -46,6 +46,7 @@
#include <sys/ptrace.h>
#include <setjmp.h>
+#include "hugepage_settings.h"
#include "pkey-helpers.h"
int iteration_nr = 1;
@@ -61,6 +62,7 @@ noinline int read_ptr(int *ptr)
return *ptr;
}
+#if CONTROL_TRACING > 0
static void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
@@ -86,7 +88,6 @@ static void cat_into_file(char *str, char *file)
close(fd);
}
-#if CONTROL_TRACING > 0
static int warned_tracing;
static int tracing_root_ok(void)
{
@@ -136,6 +137,7 @@ static void tracing_off(void)
void abort_hooks(void)
{
+ fflush(stdout);
fprintf(stderr, "running %s()...\n", __func__);
tracing_off();
#ifdef SLEEP_ON_ABORT
@@ -370,8 +372,8 @@ static void signal_handler(int signum, siginfo_t *si, void *vucontext)
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
(si->si_code == SEGV_BNDERR)) {
- printf("non-PK si_code, exiting...\n");
- exit(4);
+ dprintf0("# non-PK si_code: %d, exiting...\n", si->si_code);
+ exit(1);
}
si_pkey_ptr = siginfo_get_pkey_ptr(si);
@@ -708,50 +710,28 @@ static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
}
static int hugetlb_setup_ok;
-#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
#define GET_NR_HUGE_PAGES 10
static void setup_hugetlbfs(void)
{
- int err;
- int fd;
- char buf[256];
- long hpagesz_kb;
- long hpagesz_mb;
+ long hpagesz_mb = HPAGE_SIZE / 1024 / 1024;
+ unsigned long free_pages;
if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ ksft_print_msg("WARNING: not run as root, can not do hugetlb test\n");
return;
}
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
-
/*
- * Now go make sure that we got the pages and that they
+ * Make sure that we got the pages and that they
* are PMD-level pages. Someone might have made PUD-level
* pages the default.
*/
- hpagesz_kb = HPAGE_SIZE / 1024;
- hpagesz_mb = hpagesz_kb / 1024;
- sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
- fd = open(buf, O_RDONLY);
- if (fd < 0) {
- fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
- hpagesz_mb, strerror(errno));
- return;
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- close(fd);
- if (err <= 0) {
- fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n",
- hpagesz_mb, strerror(errno));
- return;
- }
-
- if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n",
- hpagesz_mb, buf, GET_NR_HUGE_PAGES);
+ hugetlb_save_settings();
+ hugetlb_set_nr_pages(HPAGE_SIZE, GET_NR_HUGE_PAGES);
+ free_pages = hugetlb_free_pages(HPAGE_SIZE);
+ if (free_pages < GET_NR_HUGE_PAGES) {
+ ksft_print_msg("could not confirm %ldM pages, got: '%lu' expected %d\n",
+ hpagesz_mb, free_pages, GET_NR_HUGE_PAGES);
return;
}
@@ -855,7 +835,7 @@ void expected_pkey_fault(int pkey)
#define do_not_expect_pkey_fault(msg) do { \
if (last_pkey_faults != pkey_faults) \
- dprintf0("unexpected PKey fault: %s\n", msg); \
+ dprintf0("# unexpected PKey fault: %s\n", msg); \
pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
@@ -1128,7 +1108,7 @@ static void become_child(void)
/* in the child */
return;
}
- exit(0);
+ _exit(0);
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
@@ -1507,18 +1487,18 @@ static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
* checking
*/
if (__read_pkey_reg() != new_pkru)
- exit(1);
+ _exit(1);
/* Stop and allow the tracer to clear XSTATE_BV for PKRU */
raise(SIGSTOP);
if (__read_pkey_reg() != 0)
- exit(1);
+ _exit(1);
/* Stop and allow the tracer to examine PKRU */
raise(SIGSTOP);
- exit(0);
+ _exit(0);
}
pkey_assert(child == waitpid(child, &status, 0));
@@ -1692,29 +1672,36 @@ static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
pkey_assert(sret < 0);
}
-static void (*pkey_tests[])(int *ptr, u16 pkey) = {
- test_read_of_write_disabled_region,
- test_read_of_access_disabled_region,
- test_read_of_access_disabled_region_with_page_already_mapped,
- test_write_of_write_disabled_region,
- test_write_of_write_disabled_region_with_page_already_mapped,
- test_write_of_access_disabled_region,
- test_write_of_access_disabled_region_with_page_already_mapped,
- test_kernel_write_of_access_disabled_region,
- test_kernel_write_of_write_disabled_region,
- test_kernel_gup_of_access_disabled_region,
- test_kernel_gup_write_to_write_disabled_region,
- test_executing_on_unreadable_memory,
- test_implicit_mprotect_exec_only_memory,
- test_mprotect_with_pkey_0,
- test_ptrace_of_child,
- test_pkey_init_state,
- test_pkey_syscalls_on_non_allocated_pkey,
- test_pkey_syscalls_bad_args,
- test_pkey_alloc_exhaust,
- test_pkey_alloc_free_attach_pkey0,
+struct pkey_test {
+ void (*func)(int *ptr, u16 pkey);
+ const char *name;
+};
+
+#define PKEY_TEST(fn) { fn, #fn }
+
+static struct pkey_test pkey_tests[] = {
+ PKEY_TEST(test_read_of_write_disabled_region),
+ PKEY_TEST(test_read_of_access_disabled_region),
+ PKEY_TEST(test_read_of_access_disabled_region_with_page_already_mapped),
+ PKEY_TEST(test_write_of_write_disabled_region),
+ PKEY_TEST(test_write_of_write_disabled_region_with_page_already_mapped),
+ PKEY_TEST(test_write_of_access_disabled_region),
+ PKEY_TEST(test_write_of_access_disabled_region_with_page_already_mapped),
+ PKEY_TEST(test_kernel_write_of_access_disabled_region),
+ PKEY_TEST(test_kernel_write_of_write_disabled_region),
+ PKEY_TEST(test_kernel_gup_of_access_disabled_region),
+ PKEY_TEST(test_kernel_gup_write_to_write_disabled_region),
+ PKEY_TEST(test_executing_on_unreadable_memory),
+ PKEY_TEST(test_implicit_mprotect_exec_only_memory),
+ PKEY_TEST(test_mprotect_with_pkey_0),
+ PKEY_TEST(test_ptrace_of_child),
+ PKEY_TEST(test_pkey_init_state),
+ PKEY_TEST(test_pkey_syscalls_on_non_allocated_pkey),
+ PKEY_TEST(test_pkey_syscalls_bad_args),
+ PKEY_TEST(test_pkey_alloc_exhaust),
+ PKEY_TEST(test_pkey_alloc_free_attach_pkey0),
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
- test_ptrace_modifies_pkru,
+ PKEY_TEST(test_ptrace_modifies_pkru),
#endif
};
@@ -1735,7 +1722,7 @@ static void run_tests_once(void)
dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
dprintf1("test %d starting...\n", test_nr);
- pkey_tests[test_nr](ptr, pkey);
+ pkey_tests[test_nr].func(ptr, pkey);
dprintf1("freeing test memory: %p\n", ptr);
free_pkey_malloc(ptr);
sys_pkey_free(pkey);
@@ -1746,7 +1733,7 @@ static void run_tests_once(void)
tracing_off();
close_test_fds();
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ ksft_test_result_pass("test %s (iteration %d)\n", pkey_tests[test_nr].name, iteration_nr);
dprintf1("======================\n\n");
}
iteration_nr++;
@@ -1766,27 +1753,30 @@ int main(void)
setup_handlers();
- printf("has pkeys: %d\n", pkeys_supported);
+ ksft_print_header();
if (!pkeys_supported) {
int size = PAGE_SIZE;
int *ptr;
- printf("running PKEY tests for unsupported CPU/OS\n");
+ ksft_set_plan(1);
+ ksft_print_msg("running PKEY tests for unsupported CPU/OS\n");
ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
assert(ptr != (void *)-1);
test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
- exit(0);
+ ksft_test_result_pass("pkey on unsupported CPU/OS\n");
+ ksft_finished();
}
+ ksft_set_plan(ARRAY_SIZE(pkey_tests) * nr_iterations);
+
pkey_setup_shadow();
- printf("startup pkey_reg: %016llx\n", read_pkey_reg());
+ ksft_print_msg("startup pkey_reg: %016llx\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
run_tests_once();
- printf("done (all tests OK)\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index c17b133a81d2..8c296dedf047 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -132,7 +132,7 @@ test_selected() {
run_gup_matrix() {
# -t: thp=on, -T: thp=off, -H: hugetlb=on
- local hugetlb_mb=$(( needmem_KB / 1024 ))
+ local hugetlb_mb=256
for huge in -t -T "-H -m $hugetlb_mb"; do
# -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm
@@ -154,60 +154,6 @@ run_gup_matrix() {
done
}
-# get huge pagesize and freepages from /proc/meminfo
-while read -r name size unit; do
- if [ "$name" = "HugePages_Free:" ]; then
- freepgs="$size"
- fi
- if [ "$name" = "Hugepagesize:" ]; then
- hpgsize_KB="$size"
- fi
-done < /proc/meminfo
-
-# Simple hugetlbfs tests have a hardcoded minimum requirement of
-# huge pages totaling 256MB (262144KB) in size. The userfaultfd
-# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
-# both of these requirements into account and attempt to increase
-# number of huge pages available.
-nr_cpus=$(nproc)
-uffd_min_KB=$((hpgsize_KB * nr_cpus * 2))
-hugetlb_min_KB=$((256 * 1024))
-if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then
- needmem_KB=$uffd_min_KB
-else
- needmem_KB=$hugetlb_min_KB
-fi
-
-# set proper nr_hugepages
-if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
- orig_nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
- needpgs=$((needmem_KB / hpgsize_KB))
- tries=2
- while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
- lackpgs=$((needpgs - freepgs))
- echo 3 > /proc/sys/vm/drop_caches
- if ! echo $((lackpgs + orig_nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
- echo "Please run this test as root"
- exit $ksft_skip
- fi
- while read -r name size unit; do
- if [ "$name" = "HugePages_Free:" ]; then
- freepgs=$size
- fi
- done < /proc/meminfo
- tries=$((tries - 1))
- done
- nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
- if [ "$freepgs" -lt "$needpgs" ]; then
- printf "Not enough huge pages available (%d < %d)\n" \
- "$freepgs" "$needpgs"
- fi
- HAVE_HUGEPAGES=1
-else
- echo "no hugetlbfs support in kernel?"
- HAVE_HUGEPAGES=0
-fi
-
# filter 64bit architectures
ARCH64STR="arm64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64"
if [ -z "$ARCH" ]; then
@@ -235,32 +181,61 @@ pretty_name() {
run_test() {
if test_selected ${CATEGORY}; then
local skip=0
+ local LOADED_HWPOISON_INJECT_MOD=0
# On memory constrainted systems some tests can fail to allocate hugepages.
# perform some cleanup before the test for a higher success rate.
if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
- if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ mem_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
+ mem_Mb=$((mem_kb / 1024))
+
+ if (( $mem_Mb < 256 )); then
echo 3 > /proc/sys/vm/drop_caches
sleep 2
echo 1 > /proc/sys/vm/compact_memory
sleep 2
- else
- echo "hugepages not supported" | tap_prefix
- skip=1
fi
fi
+ # Ensure hwpoison_inject is available for memory-failure tests
+ if [ "${CATEGORY}" = "memory-failure" ]; then
+ # Try to load hwpoison_inject if not present.
+ HWPOISON_DIR=/sys/kernel/debug/hwpoison/
+ if [ ! -d "$HWPOISON_DIR" ]; then
+ if ! modprobe -n hwpoison_inject > /dev/null 2>&1; then
+ echo "Module hwpoison_inject not found, skipping..." \
+ | tap_prefix
+ skip=1
+ else
+ modprobe hwpoison_inject > /dev/null 2>&1
+ LOADED_HWPOISON_INJECT_MOD=1
+ if [ ! -d "$HWPOISON_DIR" ]; then
+ echo "hwpoison debugfs interface not present" \
+ | tap_prefix
+ skip=1
+ fi
+ fi
+ fi
+
+ fi
+
local test=$(pretty_name "$*")
local title="running $*"
local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix
- if [ "${skip}" != "1" ]; then
+ if [ $skip -eq 1 ]; then
+ local ret=$ksft_skip
+ else
("$@" 2>&1) | tap_prefix
local ret=${PIPESTATUS[0]}
- else
- local ret=$ksft_skip
fi
+
+ # Unload hwpoison_inject if we loaded it
+ if [ "${LOADED_HWPOISON_INJECT_MOD}" = "1" ]; then
+ modprobe -r hwpoison_inject > /dev/null 2>&1
+ fi
+
count_total=$(( count_total + 1 ))
if [ $ret -eq 0 ]; then
count_pass=$(( count_pass + 1 ))
@@ -270,7 +245,9 @@ run_test() {
count_skip=$(( count_skip + 1 ))
echo "[SKIP]" | tap_prefix
echo "ok ${count_total} ${test} # SKIP" | tap_output
- exitcode=$ksft_skip
+ if [ $exitcode -eq 0 ]; then
+ exitcode=$ksft_skip
+ fi
else
count_fail=$(( count_fail + 1 ))
echo "[FAIL]" | tap_prefix
@@ -282,31 +259,14 @@ run_test() {
echo "TAP version 13" | tap_output
-CATEGORY="hugetlb" run_test ./hugepage-mmap
-
-shmmax=$(cat /proc/sys/kernel/shmmax)
-shmall=$(cat /proc/sys/kernel/shmall)
-echo 268435456 > /proc/sys/kernel/shmmax
-echo 4194304 > /proc/sys/kernel/shmall
-CATEGORY="hugetlb" run_test ./hugepage-shm
-echo "$shmmax" > /proc/sys/kernel/shmmax
-echo "$shmall" > /proc/sys/kernel/shmall
-
-CATEGORY="hugetlb" run_test ./map_hugetlb
-CATEGORY="hugetlb" run_test ./hugepage-mremap
-CATEGORY="hugetlb" run_test ./hugepage-vmemmap
+CATEGORY="hugetlb" run_test ./hugetlb-mmap
+CATEGORY="hugetlb" run_test ./hugetlb-shm
+CATEGORY="hugetlb" run_test ./hugetlb-mremap
+CATEGORY="hugetlb" run_test ./hugetlb-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
CATEGORY="hugetlb" run_test ./hugetlb_dio
-
-if [ "${HAVE_HUGEPAGES}" = "1" ]; then
- nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
- # For this test, we need one and just one huge page
- echo 1 > /proc/sys/vm/nr_hugepages
- CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
- CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
- # Restore the previous number of huge pages, since further tests rely on it
- echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
-fi
+CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
if test_selected "hugetlb"; then
echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix
@@ -333,44 +293,11 @@ CATEGORY="gup_test" run_test ./gup_longterm
CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
-# Hugetlb tests require source and destination huge pages. Pass in almost half
-# the size of the free pages we have, which is used for *each*. An adjustment
-# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some
-# extra hugepages - this is done to prevent the test from failing by racily
-# reserving more hugepages than strictly required.
-# uffd-stress expects a region expressed in MiB, so we adjust
-# half_ufd_size_MB accordingly.
-adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) ))
-half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2))
-CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
-CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb 128 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private 128 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
-# uffd-wp-mremap requires at least one page of each size.
-have_all_size_hugepgs=true
-declare -A nr_size_hugepgs
-for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do
- old=$(cat $f)
- nr_size_hugepgs["$f"]="$old"
- if [ "$old" == 0 ]; then
- echo 1 > "$f"
- fi
- if [ $(cat "$f") == 0 ]; then
- have_all_size_hugepgs=false
- break
- fi
-done
-if $have_all_size_hugepgs; then
- CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
-else
- echo "# SKIP ./uffd-wp-mremap"
-fi
-
-#cleanup
-for f in "${!nr_size_hugepgs[@]}"; do
- echo "${nr_size_hugepgs["$f"]}" > "$f"
-done
-echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
+CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
CATEGORY="compaction" run_test ./compaction_test
@@ -382,6 +309,7 @@ else
fi
CATEGORY="mmap" run_test ./map_populate
+CATEGORY="mmap" run_test ./droppable
CATEGORY="mlock" run_test ./mlock-random-test
@@ -394,12 +322,10 @@ CATEGORY="mremap" run_test ./mremap_test
CATEGORY="hugetlb" run_test ./thuge-gen
CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2
CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2
+
if $RUN_DESTRUCTIVE; then
-nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline)
-echo 8 > /proc/sys/vm/nr_hugepages
CATEGORY="hugetlb" run_test ./hugetlb-soft-offline
-echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline
CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
fi
@@ -436,9 +362,7 @@ CATEGORY="memfd_secret" run_test ./memfd_secret
fi
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
-if [ "${HAVE_HUGEPAGES}" = "1" ]; then
- CATEGORY="ksm" run_test ./ksm_tests -H -s 100
-fi
+CATEGORY="ksm" run_test ./ksm_tests -H -s 100
# KSM KSM_MERGE_TIME test with size of 100
CATEGORY="ksm" run_test ./ksm_tests -P -s 100
# KSM MADV_MERGEABLE test with 10 identical pages
@@ -457,7 +381,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
CATEGORY="ksm" run_test ./ksm_functional_tests
# protection_keys tests
-nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
if [ -x ./protection_keys_32 ]
then
CATEGORY="pkey" run_test ./protection_keys_32
@@ -467,7 +390,6 @@ if [ -x ./protection_keys_64 ]
then
CATEGORY="pkey" run_test ./protection_keys_64
fi
-echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
if [ -x ./soft-dirty ]
then
@@ -489,24 +411,28 @@ CATEGORY="thp" run_test ./khugepaged all:shmem
CATEGORY="thp" run_test ./khugepaged -s 4 all:shmem
-CATEGORY="thp" run_test ./transhuge-stress -d 20
-
# Try to create XFS if not provided
if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
- if [ "${HAVE_HUGEPAGES}" = "1" ]; then
- if test_selected "thp"; then
- if grep xfs /proc/filesystems &>/dev/null; then
- XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
- SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
- truncate -s 314572800 ${XFS_IMG}
- mkfs.xfs -q ${XFS_IMG}
- mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
- MOUNTED_XFS=1
- fi
+ if test_selected "thp"; then
+ if grep xfs /proc/filesystems &>/dev/null; then
+ XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+ SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+ truncate -s 314572800 ${XFS_IMG}
+ mkfs.xfs -q ${XFS_IMG}
+ mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ MOUNTED_XFS=1
fi
fi
fi
+if [ -n "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+CATEGORY="thp" run_test ./khugepaged all:file ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+elif test_selected thp; then
+ count_total=$(( count_total + 1 ))
+ count_skip=$(( count_skip + 1 ))
+ echo "[SKIP] ./khugepaged all:file" | tap_prefix
+fi
+
CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
if [ -n "${MOUNTED_XFS}" ]; then
@@ -515,6 +441,8 @@ if [ -n "${MOUNTED_XFS}" ]; then
rm -f ${XFS_IMG}
fi
+CATEGORY="thp" run_test ./transhuge-stress -d 20
+
CATEGORY="thp" run_test ./folio_split_race_test
CATEGORY="migration" run_test ./migration
@@ -531,28 +459,7 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
CATEGORY="rmap" run_test ./rmap
-# Try to load hwpoison_inject if not present.
-HWPOISON_DIR=/sys/kernel/debug/hwpoison/
-if [ ! -d "$HWPOISON_DIR" ]; then
- if ! modprobe -q -R hwpoison_inject; then
- echo "Module hwpoison_inject not found, skipping..."
- else
- modprobe hwpoison_inject > /dev/null 2>&1
- LOADED_MOD=1
- fi
-fi
-
-if [ -d "$HWPOISON_DIR" ]; then
- CATEGORY="memory-failure" run_test ./memory-failure
-fi
-
-if [ -n "${LOADED_MOD}" ]; then
- modprobe -r hwpoison_inject > /dev/null 2>&1
-fi
-
-if [ "${HAVE_HUGEPAGES}" = 1 ]; then
- echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
-fi
+CATEGORY="memory-failure" run_test ./memory-failure
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
echo "1..${count_total}" | tap_output
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index bcfcac99b436..fb1864a68e1c 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -9,7 +9,7 @@
#include "kselftest.h"
#include "vm_util.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
#define TEST_ITERATIONS 10000
@@ -143,7 +143,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
if (anon) {
map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (!map)
+ if (map == MAP_FAILED)
ksft_exit_fail_msg("anon mmap failed\n");
} else {
test_fd = open(fname, O_RDWR | O_CREAT, 0664);
@@ -155,7 +155,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
ftruncate(test_fd, pagesize);
map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
MAP_SHARED, test_fd, 0);
- if (!map)
+ if (map == MAP_FAILED)
ksft_exit_fail_msg("file mmap failed\n");
}
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 500d07c4938b..32b991472f74 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -21,7 +21,7 @@
#include <time.h>
#include "vm_util.h"
#include "kselftest.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
uint64_t pagesize;
unsigned int pageshift;
@@ -470,13 +470,18 @@ static void split_file_backed_thp(int order)
char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
const char *tmpfs_loc = mkdtemp(tmpfs_template);
char testfile[INPUT_MAX];
+ unsigned long size = 2 * pmd_pagesize;
+ char opts[64];
ssize_t num_written, num_read;
- char *file_buf1, *file_buf2;
+ char *file_buf1 = NULL, *file_buf2 = NULL;
uint64_t pgoff_start = 0, pgoff_end = 1024;
int i;
ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
+ if (!tmpfs_loc)
+ ksft_exit_fail_msg("mkdtemp failed\n");
+
file_buf1 = (char *)malloc(pmd_pagesize);
file_buf2 = (char *)malloc(pmd_pagesize);
@@ -489,10 +494,13 @@ static void split_file_backed_thp(int order)
file_buf1[i] = (char)i;
memset(file_buf2, 0, pmd_pagesize);
- status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
+ snprintf(opts, sizeof(opts), "huge=always,size=%lu", size);
+ status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, opts);
- if (status)
- ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
+ if (status) {
+ ksft_print_msg("Unable to create a tmpfs for testing\n");
+ goto out;
+ }
status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
if (status >= INPUT_MAX) {
@@ -544,10 +552,13 @@ static void split_file_backed_thp(int order)
status = umount(tmpfs_loc);
if (status) {
- rmdir(tmpfs_loc);
- ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
+ ksft_print_msg("Unable to umount %s\n", tmpfs_loc);
+ goto out;
}
+ free(file_buf1);
+ free(file_buf2);
+
status = rmdir(tmpfs_loc);
if (status)
ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
@@ -560,8 +571,10 @@ close_file:
close(fd);
cleanup:
umount(tmpfs_loc);
- rmdir(tmpfs_loc);
out:
+ free(file_buf1);
+ free(file_buf2);
+ rmdir(tmpfs_loc);
ksft_exit_fail_msg("Error occurred\n");
}
@@ -609,9 +622,13 @@ static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
assert(fd_size % sizeof(buf) == 0);
for (i = 0; i < sizeof(buf); i++)
buf[i] = (unsigned char)i;
- for (i = 0; i < fd_size; i += sizeof(buf))
- write(*fd, buf, sizeof(buf));
-
+ for (i = 0; i < fd_size; i += sizeof(buf)) {
+ if (write(*fd, buf, sizeof(buf)) != sizeof(buf)) {
+ ksft_perror("write testfile");
+ close(*fd);
+ goto err_out_unlink;
+ }
+ }
close(*fd);
sync();
*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
@@ -621,7 +638,7 @@ static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
}
if (write(*fd, "3", 1) != 1) {
ksft_perror("write to drop_caches");
- goto err_out_unlink;
+ goto err_out_close;
}
close(*fd);
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 77813d34dcc2..22b9c2f1c35d 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -1,17 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Test selecting other page sizes for mmap/shmget.
-
- Before running this huge pages for each huge page size must have been
- reserved.
- For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must
- be used. 1GB wouldn't be tested if it isn't available.
- Also shmmax must be increased.
- And you need to run as root to work around some weird permissions in shm.
- And nothing using huge pages should run in parallel.
- When the program aborts you may need to clean up the shm segments with
- ipcrm -m by hand, like this
- sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
- (warning this will remove all if someone else uses them) */
+/* Test selecting other page sizes for mmap/shmget. */
#define _GNU_SOURCE
#include <sys/mman.h>
@@ -21,13 +9,12 @@
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/stat.h>
-#include <glob.h>
-#include <assert.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include "vm_util.h"
#include "kselftest.h"
+#include "hugepage_settings.h"
#if !defined(MAP_HUGETLB)
#define MAP_HUGETLB 0x40000
@@ -37,15 +24,6 @@
#ifndef SHM_HUGE_SHIFT
#define SHM_HUGE_SHIFT 26
#endif
-#ifndef SHM_HUGE_MASK
-#define SHM_HUGE_MASK 0x3f
-#endif
-#ifndef SHM_HUGE_2MB
-#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
-#endif
-#ifndef SHM_HUGE_1GB
-#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
-#endif
#define NUM_PAGESIZES 5
#define NUM_PAGES 4
@@ -63,32 +41,10 @@ int ilog2(unsigned long v)
void show(unsigned long ps)
{
- char buf[100];
-
if (ps == getpagesize())
return;
- ksft_print_msg("%luMB: ", ps >> 20);
-
- fflush(stdout);
- snprintf(buf, sizeof buf,
- "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
- ps >> 10);
- system(buf);
-}
-
-unsigned long read_free(unsigned long ps)
-{
- unsigned long val = 0;
- char buf[100];
-
- snprintf(buf, sizeof(buf),
- "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
- ps >> 10);
- if (read_sysfs(buf, &val) && ps != getpagesize())
- ksft_print_msg("missing %s\n", buf);
-
- return val;
+ ksft_print_msg("%luMB: %lu\n", ps >> 20, hugetlb_free_pages(ps));
}
void test_mmap(unsigned long size, unsigned flags)
@@ -96,14 +52,14 @@ void test_mmap(unsigned long size, unsigned flags)
char *map;
unsigned long before, after;
- before = read_free(size);
+ before = hugetlb_free_pages(size);
map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
if (map == MAP_FAILED)
ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
memset(map, 0xff, size*NUM_PAGES);
- after = read_free(size);
+ after = hugetlb_free_pages(size);
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
@@ -120,7 +76,7 @@ void test_shmget(unsigned long size, unsigned flags)
struct shm_info i;
char *map;
- before = read_free(size);
+ before = hugetlb_free_pages(size);
id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
if (id < 0) {
if (errno == EPERM) {
@@ -141,7 +97,7 @@ void test_shmget(unsigned long size, unsigned flags)
shmctl(id, IPC_RMID, NULL);
memset(map, 0xff, size*NUM_PAGES);
- after = read_free(size);
+ after = hugetlb_free_pages(size);
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
@@ -153,43 +109,15 @@ void test_shmget(unsigned long size, unsigned flags)
void find_pagesizes(void)
{
unsigned long largest = getpagesize();
- unsigned long shmmax_val = 0;
int i;
- glob_t g;
- glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
- assert(g.gl_pathc <= NUM_PAGESIZES);
- for (i = 0; (i < g.gl_pathc) && (num_page_sizes < NUM_PAGESIZES); i++) {
- sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
- &page_sizes[num_page_sizes]);
- page_sizes[num_page_sizes] <<= 10;
- ksft_print_msg("Found %luMB\n", page_sizes[i] >> 20);
+ num_page_sizes = hugetlb_setup(NUM_PAGES, page_sizes, ARRAY_SIZE(page_sizes));
- if (page_sizes[num_page_sizes] > largest)
+ for (i = 0; i < num_page_sizes; i++)
+ if (page_sizes[i] > largest)
largest = page_sizes[i];
- if (read_free(page_sizes[num_page_sizes]) >= NUM_PAGES)
- num_page_sizes++;
- else
- ksft_print_msg("SKIP for size %lu MB as not enough huge pages, need %u\n",
- page_sizes[num_page_sizes] >> 20, NUM_PAGES);
- }
- globfree(&g);
-
- read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val);
- if (shmmax_val < NUM_PAGES * largest) {
- ksft_print_msg("WARNING: shmmax is too small to run this test.\n");
- ksft_print_msg("Please run the following command to increase shmmax:\n");
- ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES);
- ksft_exit_skip("Test skipped due to insufficient shmmax value.\n");
- }
-
-#if defined(__x86_64__)
- if (largest != 1U<<30) {
- ksft_exit_skip("No GB pages available on x86-64\n"
- "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
- }
-#endif
+ shm_limits_prepare(NUM_PAGES * largest);
}
int main(void)
@@ -232,3 +160,5 @@ int main(void)
ksft_finished();
}
+
+SHM_LIMITS_RESTORE()
diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c
index 7a9f1035099b..8eb0c5630e7e 100644
--- a/tools/testing/selftests/mm/transhuge-stress.c
+++ b/tools/testing/selftests/mm/transhuge-stress.c
@@ -17,7 +17,7 @@
#include <sys/mman.h>
#include "vm_util.h"
#include "kselftest.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
int backing_fd = -1;
int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index 844a85ab31eb..92a21b97f745 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -37,24 +37,24 @@
#include "kselftest.h"
#include "vm_util.h"
+#include "hugepage_settings.h"
#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
-#define _err(fmt, ...) \
- do { \
- int ret = errno; \
- fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
- fprintf(stderr, " (errno=%d, @%s:%d)\n", \
- ret, __FILE__, __LINE__); \
+#define _err(fmt, ...) \
+ do { \
+ int ret = errno; \
+ ksft_print_msg("ERROR: " fmt " (errno=%d, @%s:%d)\n", \
+ ##__VA_ARGS__, ret, __FILE__, __LINE__); \
} while (0)
-#define errexit(exitcode, fmt, ...) \
+#define errexit(fmt, ...) \
do { \
_err(fmt, ##__VA_ARGS__); \
- exit(exitcode); \
+ ksft_exit_fail(); \
} while (0)
-#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+#define err(fmt, ...) errexit(fmt, ##__VA_ARGS__)
struct uffd_global_test_opts {
unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size;
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index 700fbaa18d44..3401dd6028f0 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -286,18 +286,12 @@ static int userfaultfd_stress(uffd_global_test_opts_t *gopts)
pthread_attr_setstacksize(&attr, 16*1024*1024);
while (bounces--) {
- printf("bounces: %d, mode:", bounces);
- if (bounces & BOUNCE_RANDOM)
- printf(" rnd");
- if (bounces & BOUNCE_RACINGFAULTS)
- printf(" racing");
- if (bounces & BOUNCE_VERIFY)
- printf(" ver");
- if (bounces & BOUNCE_POLL)
- printf(" poll");
- else
- printf(" read");
- printf(", ");
+ ksft_print_msg("bounces: %d, mode:%s%s%s%s, ",
+ bounces,
+ bounces & BOUNCE_RANDOM ? " rnd" : "",
+ bounces & BOUNCE_RACINGFAULTS ? " racing" : "",
+ bounces & BOUNCE_VERIFY ? " ver" : "",
+ bounces & BOUNCE_POLL ? " poll" : " read");
fflush(stdout);
if (bounces & BOUNCE_POLL)
@@ -461,6 +455,9 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (signal(SIGALRM, sigalrm) == SIG_ERR)
err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
@@ -483,17 +480,17 @@ int main(int argc, char **argv)
* Ensure nr_parallel - 1 hugepages on top of that to account
* for racy extra reservation of hugepages.
*/
- if (gopts->test_type == TEST_HUGETLB &&
- get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) {
- printf("skip: Skipping userfaultfd... not enough hugepages\n");
- return KSFT_SKIP;
+ if (gopts->test_type == TEST_HUGETLB) {
+ unsigned long nr = 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1;
+
+ if (!hugetlb_setup_default(nr))
+ ksft_exit_skip("Skipping userfaultfd... not enough hugepages\n");
}
gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel;
if (!gopts->nr_pages_per_cpu) {
- _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)",
- bytes, gopts->page_size, gopts->nr_parallel);
- usage();
+ ksft_exit_skip("pages_per_cpu = 0, cannot test (%zu / %lu / %lu)\n",
+ bytes, gopts->page_size, gopts->nr_parallel);
}
bounces = atoi(argv[3]);
@@ -503,9 +500,12 @@ int main(int argc, char **argv)
}
gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel;
- printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
- gopts->nr_pages, gopts->nr_pages_per_cpu);
- return userfaultfd_stress(gopts);
+ ksft_print_msg("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ gopts->nr_pages, gopts->nr_pages_per_cpu);
+
+ ksft_test_result(!userfaultfd_stress(gopts),
+ "uffd-stress %s\n", argv[1]);
+ ksft_finished();
}
#else /* __NR_userfaultfd */
@@ -514,8 +514,8 @@ int main(int argc, char **argv)
int main(void)
{
- printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
- return KSFT_SKIP;
+ ksft_print_header();
+ ksft_exit_skip("missing __NR_userfaultfd definition\n");
}
#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 6f5e404a446c..a6c14109e818 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -86,47 +86,28 @@ typedef struct {
uffd_test_case_ops_t *test_case_ops;
} uffd_test_case_t;
-static void uffd_test_report(void)
-{
- printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
- ksft_get_pass_cnt(),
- ksft_get_xskip_cnt(),
- ksft_get_fail_cnt(),
- ksft_test_num());
-}
+static char current_test[256];
static void uffd_test_pass(void)
{
- printf("done\n");
- ksft_inc_pass_cnt();
+ ksft_test_result_pass("%s\n", current_test);
}
#define uffd_test_start(...) do { \
- printf("Testing "); \
- printf(__VA_ARGS__); \
- printf("... "); \
- fflush(stdout); \
+ snprintf(current_test, sizeof(current_test), __VA_ARGS__); \
} while (0)
-#define uffd_test_fail(...) do { \
- printf("failed [reason: "); \
- printf(__VA_ARGS__); \
- printf("]\n"); \
- ksft_inc_fail_cnt(); \
+#define uffd_test_fail(fmt, ...) do { \
+ ksft_print_msg("failed reason: [" fmt "]\n", ##__VA_ARGS__); \
+ ksft_test_result_fail("%s\n", current_test); \
} while (0)
static void uffd_test_skip(const char *message)
{
- printf("skipped [reason: %s]\n", message);
- ksft_inc_xskip_cnt();
+ ksft_test_result_skip("%s (%s)\n", current_test, message);
}
-/*
- * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
- * return 1 even if some test failed as long as uffd supported, because in
- * that case we still want to proceed with the rest uffd unit tests.
- */
-static int test_uffd_api(bool use_dev)
+static void test_uffd_api(bool use_dev)
{
struct uffdio_api uffdio_api;
int uffd;
@@ -140,7 +121,7 @@ static int test_uffd_api(bool use_dev)
uffd = uffd_open_sys(UFFD_FLAGS);
if (uffd < 0) {
uffd_test_skip("cannot open userfaultfd handle");
- return 0;
+ return;
}
/* Test wrong UFFD_API */
@@ -177,8 +158,6 @@ static int test_uffd_api(bool use_dev)
uffd_test_pass();
out:
close(uffd);
- /* We have a valid uffd handle */
- return 1;
}
@@ -320,7 +299,7 @@ static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bo
if (test_pin)
unpin_pages(&args);
/* Succeed */
- exit(0);
+ _exit(0);
}
waitpid(child, &result, 0);
@@ -788,7 +767,7 @@ static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
err("fork");
if (!pid)
- exit(faulting_process(gopts, 2, wp));
+ _exit(faulting_process(gopts, 2, wp));
waitpid(pid, &err, 0);
if (err)
@@ -842,7 +821,7 @@ static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
err("fork");
if (!pid)
- exit(faulting_process(gopts, 0, wp));
+ _exit(faulting_process(gopts, 0, wp));
waitpid(pid, &err, 0);
if (err)
@@ -1701,18 +1680,58 @@ static void usage(const char *prog)
exit(KSFT_FAIL);
}
+static int uffd_count_tests(int n_tests, int n_mems, const char *test_filter)
+{
+ uffd_test_case_t *test;
+ int i, j, count = 0;
+
+ if (!test_filter)
+ count += 2; /* test_uffd_api(false) + test_uffd_api(true) */
+
+ for (i = 0; i < n_tests; i++) {
+ test = &uffd_tests[i];
+ if (test_filter && !strstr(test->name, test_filter))
+ continue;
+ for (j = 0; j < n_mems; j++)
+ if (test->mem_targets & mem_types[j].mem_flag)
+ count++;
+ }
+
+ return count;
+}
+
+static unsigned long uffd_setup_hugetlb(void)
+{
+ unsigned long nr_hugepages, hp_size;
+
+ hugetlb_save_settings();
+ hp_size = default_huge_page_size();
+
+ if (!hp_size)
+ return 0;
+
+ /* need twice UFFD_TEST_MEM_SIZE, one for src area and one for dst */
+ nr_hugepages = 2 * MAX(UFFD_TEST_MEM_SIZE, hp_size * 2) / hp_size;
+ hugetlb_set_nr_default_pages(nr_hugepages);
+
+ if (hugetlb_free_default_pages() < nr_hugepages)
+ return 0;
+
+ return hp_size;
+}
+
int main(int argc, char *argv[])
{
int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
const char *test_filter = NULL;
+ unsigned long hugepage_size;
bool list_only = false;
uffd_test_case_t *test;
mem_type_t *mem_type;
uffd_test_args_t args;
const char *errmsg;
- int has_uffd, opt;
- int i, j;
+ int i, j, opt;
while ((opt = getopt(argc, argv, "f:hl")) != -1) {
switch (opt) {
@@ -1730,24 +1749,30 @@ int main(int argc, char *argv[])
}
}
- if (!test_filter && !list_only) {
- has_uffd = test_uffd_api(false);
- has_uffd |= test_uffd_api(true);
-
- if (!has_uffd) {
- printf("Userfaultfd not supported or unprivileged, skip all tests\n");
- exit(KSFT_SKIP);
+ if (list_only) {
+ for (i = 0; i < n_tests; i++) {
+ test = &uffd_tests[i];
+ if (test_filter && !strstr(test->name, test_filter))
+ continue;
+ printf("%s\n", test->name);
}
+ return KSFT_PASS;
+ }
+
+ hugepage_size = uffd_setup_hugetlb();
+
+ ksft_print_header();
+ ksft_set_plan(uffd_count_tests(n_tests, n_mems, test_filter));
+
+ if (!test_filter) {
+ test_uffd_api(false);
+ test_uffd_api(true);
}
for (i = 0; i < n_tests; i++) {
test = &uffd_tests[i];
if (test_filter && !strstr(test->name, test_filter))
continue;
- if (list_only) {
- printf("%s\n", test->name);
- continue;
- }
for (j = 0; j < n_mems; j++) {
mem_type = &mem_types[j];
@@ -1758,10 +1783,14 @@ int main(int argc, char *argv[])
uffd_test_ops = mem_type->mem_ops;
uffd_test_case_ops = test->test_case_ops;
+ if (!(test->mem_targets & mem_type->mem_flag))
+ continue;
+
+ uffd_test_start("%s on %s", test->name, mem_type->name);
if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
- gopts.page_size = default_huge_page_size();
+ gopts.page_size = hugepage_size;
if (gopts.page_size == 0) {
- uffd_test_skip("huge page size is 0, feature missing?");
+ uffd_test_skip("not enough HugeTLB pages");
continue;
}
} else {
@@ -1777,10 +1806,6 @@ int main(int argc, char *argv[])
/* Initialize test arguments */
args.mem_type = mem_type;
- if (!(test->mem_targets & mem_type->mem_flag))
- continue;
-
- uffd_test_start("%s on %s", test->name, mem_type->name);
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
@@ -1794,10 +1819,7 @@ int main(int argc, char *argv[])
}
}
- if (!list_only)
- uffd_test_report();
-
- return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
+ ksft_finished();
}
#else /* __NR_userfaultfd */
@@ -1806,8 +1828,8 @@ int main(int argc, char *argv[])
int main(void)
{
- printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
- return KSFT_SKIP;
+ ksft_print_header();
+ ksft_exit_skip("missing __NR_userfaultfd definition\n");
}
#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
index 17186d4a4147..c973d6722720 100644
--- a/tools/testing/selftests/mm/uffd-wp-mremap.c
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -8,16 +8,27 @@
#include <linux/mman.h>
#include <sys/mman.h>
#include "kselftest.h"
-#include "thp_settings.h"
+#include "hugepage_settings.h"
#include "uffd-common.h"
static int pagemap_fd;
-static size_t pagesize;
static int nr_pagesizes = 1;
+static unsigned long pagesize;
static int nr_thpsizes;
static size_t thpsizes[20];
static int nr_hugetlbsizes;
-static size_t hugetlbsizes[10];
+static unsigned long hugetlbsizes[10];
+
+static void check_uffd_wp_feature_supported(void)
+{
+ uint64_t features = 0;
+
+ if (uffd_get_features(&features))
+ ksft_exit_skip("failed to get available features (%d)\n", errno);
+
+ if (!(features & UFFD_FEATURE_PAGEFAULT_FLAG_WP))
+ ksft_exit_skip("uffd-wp feature not supported\n");
+}
static int detect_thp_sizes(size_t sizes[], int max)
{
@@ -245,7 +256,7 @@ out:
}
struct testcase {
- size_t *sizes;
+ unsigned long *sizes;
int *nr_sizes;
bool private;
bool swapout;
@@ -336,14 +347,16 @@ int main(int argc, char **argv)
struct thp_settings settings;
int i, j, plan = 0;
+ hugepage_save_settings(true, true);
+
+ check_uffd_wp_feature_supported();
+
pagesize = getpagesize();
nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
- nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
- ARRAY_SIZE(hugetlbsizes));
+ nr_hugetlbsizes = hugetlb_setup(1, hugetlbsizes, ARRAY_SIZE(hugetlbsizes));
- /* If THP is supported, save THP settings and initially disable THP. */
+ /* If THP is supported, initially disable THP. */
if (nr_thpsizes) {
- thp_save_settings();
thp_read_settings(&settings);
for (i = 0; i < NR_ORDERS; i++) {
settings.hugepages[i].enabled = THP_NEVER;
@@ -368,10 +381,6 @@ int main(int argc, char **argv)
tc->swapout, tc->hugetlb);
}
- /* If THP is supported, restore original THP settings. */
- if (nr_thpsizes)
- thp_restore_settings();
-
i = ksft_get_fail_cnt();
if (i)
ksft_exit_fail_msg("%d out of %d tests failed\n",
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index 51401e081b20..e24d7ba00b44 100644
--- a/tools/testing/selftests/mm/va_high_addr_switch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -11,6 +11,7 @@
#include "vm_util.h"
#include "kselftest.h"
+#include "hugepage_settings.h"
/*
* The hint addr value is used to allocate addresses
@@ -257,40 +258,35 @@ void testcases_init(void)
switch_hint = addr_switch_hint;
}
-static int run_test(struct testcase *test, int count)
+static void run_test(struct testcase *test, int count)
{
void *p;
- int i, ret = KSFT_PASS;
+ int i;
for (i = 0; i < count; i++) {
struct testcase *t = test + i;
p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
-
- printf("%s: %p - ", t->msg, p);
-
if (p == MAP_FAILED) {
- printf("FAILED\n");
- ret = KSFT_FAIL;
+ ksft_perror("MAP_FAILED");
+ ksft_test_result_fail("%s\n", t->msg);
continue;
}
if (t->low_addr_required && p >= (void *)(switch_hint)) {
- printf("FAILED\n");
- ret = KSFT_FAIL;
+ ksft_print_msg("%p not below switch hint\n", p);
+ ksft_test_result_fail("%s\n", t->msg);
} else {
/*
* Do a dereference of the address returned so that we catch
* bugs in page fault handling
*/
memset(p, 0, t->size);
- printf("OK\n");
+ ksft_test_result_pass("%s\n", t->msg);
}
if (!t->keep_mapped)
munmap(p, t->size);
}
-
- return ret;
}
#ifdef __aarch64__
@@ -322,19 +318,23 @@ static int supported_arch(void)
int main(int argc, char **argv)
{
- int ret, hugetlb_ret = KSFT_PASS;
+ bool run_hugetlb = false;
+
+ ksft_print_header();
if (!supported_arch())
- return KSFT_SKIP;
+ ksft_exit_skip("Architecture not supported\n");
+
+ if (hugetlb_setup_default(6))
+ run_hugetlb = true;
testcases_init();
- ret = run_test(testcases, sz_testcases);
- if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
- hugetlb_ret = run_test(hugetlb_testcases, sz_hugetlb_testcases);
+ ksft_set_plan(sz_testcases + (run_hugetlb ? sz_hugetlb_testcases : 0));
+
+ run_test(testcases, sz_testcases);
+ if (run_hugetlb)
+ run_test(hugetlb_testcases, sz_hugetlb_testcases);
- if (ret == KSFT_PASS && hugetlb_ret == KSFT_PASS)
- return KSFT_PASS;
- else
- return KSFT_FAIL;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 9492c2d72634..01c15fe3c799 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -9,7 +9,6 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-orig_nr_hugepages=0
skip()
{
@@ -77,43 +76,5 @@ check_test_requirements()
esac
}
-save_nr_hugepages()
-{
- orig_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages)
-}
-
-restore_nr_hugepages()
-{
- echo "$orig_nr_hugepages" > /proc/sys/vm/nr_hugepages
-}
-
-setup_nr_hugepages()
-{
- local needpgs=$1
- while read -r name size unit; do
- if [ "$name" = "HugePages_Free:" ]; then
- freepgs="$size"
- break
- fi
- done < /proc/meminfo
- if [ "$freepgs" -ge "$needpgs" ]; then
- return
- fi
- local hpgs=$((orig_nr_hugepages + needpgs))
- echo $hpgs > /proc/sys/vm/nr_hugepages
-
- local nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
- if [ "$nr_hugepgs" != "$hpgs" ]; then
- restore_nr_hugepages
- skip "$0: no enough hugepages for testing"
- fi
-}
-
check_test_requirements
-save_nr_hugepages
-# The HugeTLB tests require 6 pages
-setup_nr_hugepages 6
-./va_high_addr_switch --run-hugetlb
-retcode=$?
-restore_nr_hugepages
-exit $retcode
+./va_high_addr_switch
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index db94564f4431..311fc5b4513e 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -2,7 +2,6 @@
#include <string.h>
#include <errno.h>
#include <fcntl.h>
-#include <dirent.h>
#include <inttypes.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
@@ -291,53 +290,6 @@ int64_t allocate_transhuge(void *ptr, int pagemap_fd)
return -1;
}
-unsigned long default_huge_page_size(void)
-{
- unsigned long hps = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
-
- if (!f)
- return 0;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
- hps <<= 10;
- break;
- }
- }
-
- free(line);
- fclose(f);
- return hps;
-}
-
-int detect_hugetlb_page_sizes(size_t sizes[], int max)
-{
- DIR *dir = opendir("/sys/kernel/mm/hugepages/");
- int count = 0;
-
- if (!dir)
- return 0;
-
- while (count < max) {
- struct dirent *entry = readdir(dir);
- size_t kb;
-
- if (!entry)
- break;
- if (entry->d_type != DT_DIR)
- continue;
- if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
- continue;
- sizes[count++] = kb * 1024;
- ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
- kb);
- }
- closedir(dir);
- return count;
-}
-
int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
{
size_t count;
@@ -396,25 +348,6 @@ int uffd_unregister(int uffd, void *addr, uint64_t len)
return ret;
}
-unsigned long get_free_hugepages(void)
-{
- unsigned long fhp = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
-
- if (!f)
- return fhp;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
- break;
- }
-
- free(line);
- fclose(f);
- return fhp;
-}
-
static bool check_vmflag(void *addr, const char *flag)
{
char buffer[MAX_LINE_LENGTH];
@@ -463,7 +396,7 @@ bool softdirty_supported(void)
/* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */
addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
- if (!addr)
+ if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
supported = check_vmflag(addr, "sd");
@@ -765,6 +698,27 @@ int unpoison_memory(unsigned long pfn)
return ret > 0 ? 0 : -errno;
}
+int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
void write_file(const char *path, const char *buf, size_t buflen)
{
int fd, saved_errno;
@@ -788,3 +742,49 @@ void write_file(const char *path, const char *buf, size_t buflen)
ksft_exit_fail_msg("%s write(%.*s) is truncated, expected %zu bytes, got %zd bytes\n",
path, (int)(buflen - 1), buf, buflen - 1, numwritten);
}
+
+unsigned long read_num(const char *path)
+{
+ char buf[21];
+
+ if (read_file(path, buf, sizeof(buf)) < 0)
+ ksft_exit_fail_perror("read_file()");
+
+ return strtoul(buf, NULL, 10);
+}
+
+void write_num(const char *path, unsigned long num)
+{
+ char buf[21];
+
+ sprintf(buf, "%lu", num);
+ write_file(path, buf, strlen(buf) + 1);
+}
+
+static unsigned long shmall, shmmax;
+
+void __shm_limits_restore(void)
+{
+ if (shmmax)
+ write_num("/proc/sys/kernel/shmmax", shmmax);
+ if (shmall)
+ write_num("/proc/sys/kernel/shmall", shmall);
+}
+
+void shm_limits_prepare(unsigned long length)
+{
+ unsigned long nr = length / psize();
+ unsigned long val;
+
+ val = read_num("/proc/sys/kernel/shmmax");
+ if (val < length) {
+ write_num("/proc/sys/kernel/shmmax", length);
+ shmmax = val;
+ }
+
+ val = read_num("/proc/sys/kernel/shmall");
+ if (val < nr) {
+ write_num("/proc/sys/kernel/shmall", nr);
+ shmall = val;
+ }
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 1a07305ceff4..ea8fc8fdf0eb 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -94,8 +94,6 @@ bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
int64_t allocate_transhuge(void *ptr, int pagemap_fd);
-unsigned long default_huge_page_size(void);
-int detect_hugetlb_page_sizes(size_t sizes[], int max);
int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags);
int uffd_register(int uffd, void *addr, uint64_t len,
@@ -103,7 +101,6 @@ int uffd_register(int uffd, void *addr, uint64_t len,
int uffd_unregister(int uffd, void *addr, uint64_t len);
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
-unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
bool check_vmflag_pfnmap(void *addr);
bool check_vmflag_guard(void *addr);
@@ -168,3 +165,15 @@ int unpoison_memory(unsigned long pfn);
#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
void write_file(const char *path, const char *buf, size_t buflen);
+int read_file(const char *path, char *buf, size_t buflen);
+unsigned long read_num(const char *path);
+void write_num(const char *path, unsigned long num);
+
+void shm_limits_prepare(unsigned long length);
+void __shm_limits_restore(void);
+
+#define SHM_LIMITS_RESTORE() \
+static void __attribute__((destructor)) shm_limits_restore(void) \
+{ \
+ __shm_limits_restore(); \
+}
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
index 334a7eca8a80..cc571f607429 100755
--- a/tools/testing/selftests/net/broadcast_ether_dst.sh
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -44,7 +44,7 @@ test_broadcast_ether_dst() {
# tcpdump will exit after receiving a single packet
# timeout will kill tcpdump if it is still running after 2s
timeout 2s ip netns exec "${CLIENT_NS}" \
- tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+ tcpdump -i link0 -c 1 -w "${CAPFILE}" -Z root icmp &> "${OUTPUT}" &
pid=$!
slowwait 1 grep -qs "listening" "${OUTPUT}"
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
index d860f7d9744b..7261975957ef 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -2,18 +2,32 @@
# SPDX-License-Identifier: GPL-2.0
#
# Testing For SCTP COLLISION SCENARIO as Below:
-#
+# 1. Stale INIT_ACK capture:
# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+# (Delayed)
+#
+# 2. Stale INIT capture:
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# (Delayed)
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
#
# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
source lib.sh
+checktool "nft --version" "run test without nft"
+checktool "tc -h" "run test without tc"
+checktool "modprobe -q sctp" "load sctp module"
+
CLIENT_IP="198.51.200.1"
CLIENT_PORT=1234
@@ -24,7 +38,8 @@ CLIENT_GW="198.51.200.2"
SERVER_GW="198.51.100.2"
# setup the topo
-setup() {
+topo_setup() {
+ # setup_ns cleans up existing net namespaces first.
setup_ns CLIENT_NS SERVER_NS ROUTER_NS
ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
@@ -38,35 +53,53 @@ setup() {
ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+ sysctl -wq net.netfilter.nf_log_all_netns=1
ip -n "$CLIENT_NS" link set link3 up
ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+}
+
+conf_delay()
+{
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK/INIT with
+ local ns=$1
+ local link=$2
+ local chunk_type=$3
- # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
- # tc on $SERVER_NS side
- tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
- tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
- tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
- 0xff match u8 2 0xff at 32 flowid 1:1
- if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+
+ tc -n "$ns" qdisc add dev "$link" root handle 1: htb r2q 64
+ tc -n "$ns" class add dev "$link" parent 1: classid 1:1 htb rate 100mbit
+ tc -n "$ns" filter add dev "$link" parent 1: protocol ip \
+ u32 match ip protocol 132 0xff match u8 "$chunk_type" 0xff at 32 flowid 1:1
+ if ! tc -n "$ns" qdisc add dev "$link" parent 1:1 handle 10: netem delay 1200ms; then
echo "SKIP: Cannot add netem qdisc"
- exit $ksft_skip
+ return $ksft_skip
fi
# simulate the ctstate check on OVS nf_conntrack
- ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
- ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
-
- # use a smaller number for assoc's max_retrans to reproduce the issue
- modprobe -q sctp
- ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+ ip net exec "$ROUTER_NS" nft -f - <<-EOF
+ table ip t {
+ chain forward {
+ type filter hook forward priority filter; policy accept;
+ meta l4proto icmp counter accept
+ ct state new counter accept
+ ct state established,related counter accept
+ ct state invalid log flags all counter drop comment \
+ "Expect to drop stale INIT/INIT_ACK chunks"
+ counter
+ }
+ }
+ EOF
+ return 0
}
cleanup() {
- ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
- ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+ # cleanup_all_ns terminates running processes in the namespaces.
cleanup_all_ns
+ sysctl -wq net.netfilter.nf_log_all_netns=0
}
do_test() {
@@ -81,7 +114,19 @@ do_test() {
# run the test case
trap cleanup EXIT
-setup && \
-echo "Test for SCTP Collision in nf_conntrack:" && \
-do_test && echo "PASS!"
-exit $?
+
+echo "Test for SCTP INIT_ACK Collision in nf_conntrack:"
+topo_setup || exit $?
+conf_delay $SERVER_NS link0 2 || exit $?
+
+if ! do_test; then
+ exit $ksft_fail
+fi
+
+echo "Test for SCTP INIT Collision in nf_conntrack:"
+topo_setup || exit $?
+conf_delay $CLIENT_NS link3 1 || exit $?
+
+if ! do_test; then
+ exit $ksft_fail
+fi
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index 7a34ef468975..08ad07500e8a 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,7 +592,7 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
-ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2 encaplimit none
ip -net "$nsr1" link set tun6 up
ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
@@ -601,7 +601,7 @@ ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 encaplimit none || ret=1
ip -net "$nsr2" link set tun6 up
ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
@@ -651,7 +651,7 @@ ip -net "$nsr1" route change default via 192.168.200.2
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
-ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 encaplimit none
ip -net "$nsr1" link set tun6.10 up
ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
ip -6 -net "$nsr1" route delete default
@@ -670,7 +670,7 @@ ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 encaplimit none || ret=1
ip -net "$nsr2" link set tun6.10 up
ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
ip -6 -net "$nsr2" route delete default
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index d80390848e85..7c857a2e0f34 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -85,11 +85,12 @@ ip -net "$ns3" route add default via 10.0.3.1
ip -net "$ns3" route add default via dead:3::1
load_ruleset() {
- local name=$1
- local prio=$2
+ local family=$1
+ local name=$2
+ local prio=$3
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
-table inet $name {
+table $family $name {
chain nfq {
ip protocol icmp queue bypass
icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
@@ -228,6 +229,7 @@ nf_queue_wait()
test_queue()
{
local expected="$1"
+ local family="$2"
local last=""
# spawn nf_queue listeners
@@ -255,11 +257,13 @@ test_queue()
if [ x"$last" != x"$expected packets total" ]; then
echo "FAIL: Expected $expected packets total, but got $last" 1>&2
ip netns exec "$nsrouter" nft list ruleset
+ echo -n "$TMPFILE0: ";cat "$TMPFILE0"
+ echo -n "$TMPFILE1: ";cat "$TMPFILE1"
exit 1
fi
done
- echo "PASS: Expected and received $last"
+ echo "PASS: Expected and received $last ($family)"
}
listener_ready()
@@ -400,6 +404,8 @@ EOF
kill "$nfqpid"
echo "PASS: icmp+nfqueue via vrf"
+ ip -net "$ns1" link del tvrf
+ ip netns exec "$ns1" nft flush ruleset
}
sctp_listener_ready()
@@ -814,12 +820,53 @@ EOF
check_tainted "queue program exiting while packets queued"
}
+test_queue_bridge()
+{
+ ip -net "$nsrouter" addr flush dev veth0
+ ip -net "$nsrouter" addr flush dev veth1
+
+ ip -net "$nsrouter" link add br0 type bridge
+ ip -net "$nsrouter" link set veth0 master br0
+ ip -net "$nsrouter" link set veth1 master br0
+
+ ip -net "$nsrouter" link set br0 up
+
+ ip -net "$nsrouter" addr add 10.0.2.1/16 dev br0
+ ip -net "$nsrouter" addr add dead:2::1/64 dev br0 nodad
+
+ ip -net "$ns1" addr flush dev eth0
+ ip -net "$ns2" addr flush dev eth0
+
+ ip -net "$ns1" addr add 10.0.1.1/16 dev eth0
+ ip -net "$ns1" addr add dead:2::2/64 dev eth0 nodad
+
+ ip -net "$ns2" addr add 10.0.2.99/16 dev eth0
+ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+
+ ip netns exec "$nsrouter" nft flush ruleset
+
+ ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=0 > /dev/null
+ ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=0 > /dev/null
+ ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=0 > /dev/null
+
+ if ! test_ping;then
+ echo "FAIL: netns bridge connectivity" 1>&2
+ exit $ret
+ fi
+
+ load_ruleset "bridge" "filter" 10
+ test_queue 10 "bridge"
+
+ load_ruleset "bridge" "filter2" 20
+ test_queue 20 "bridge"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
-load_ruleset "filter" 0
+load_ruleset "inet" "filter" 0
if test_ping; then
# queue bypass works (rules were skipped, no listener)
@@ -842,11 +889,11 @@ load_counter_ruleset 10
# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
# so we expect that userspace program receives 10 packets.
-test_queue 10
+test_queue 10 "inet"
# same. We queue to a second program as well.
-load_ruleset "filter2" 20
-test_queue 20
+load_ruleset "inet" "filter2" 20
+test_queue 20 "inet"
ip netns exec "$ns1" nft flush ruleset
test_tcp_forward
@@ -863,4 +910,7 @@ test_queue_stress
test_icmp_vrf
test_queue_removal
+# turns router into a bridge
+test_queue_bridge
+
exit $ret
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 9b9a3cb2700d..cbdd3ea28b99 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -997,6 +997,8 @@ TEST_F(tls, splice_short)
char sendbuf[0x100];
char sendchar = 'S';
int pipefds[2];
+ int pipe_sz;
+ int ret;
int i;
sendchar_iov.iov_base = &sendchar;
@@ -1005,7 +1007,11 @@ TEST_F(tls, splice_short)
memset(sendbuf, 's', sizeof(sendbuf));
ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
- ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+ pipe_sz = (MAX_FRAGS + 1) * getpagesize();
+ ret = fcntl(pipefds[0], F_SETPIPE_SZ, pipe_sz);
+ if (ret < 0 && errno == EPERM)
+ SKIP(return, "insufficient pipe capacity");
+ ASSERT_GE(ret, pipe_sz);
for (i = 0; i < MAX_FRAGS; i++)
ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
index e8c02c64e03a..d04caa14202d 100755
--- a/tools/testing/selftests/net/vlan_bridge_binding.sh
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -64,7 +64,7 @@ check_operstate()
local expect=$1; shift
local operstate
- operstate=$(busywait 1000 \
+ operstate=$(busywait 2000 \
operstate_is "$dev" "$expect")
check_err $? "Got operstate of $operstate, expected $expect"
}
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
index 0f64b9b17081..a84709cabd8b 100644
--- a/tools/testing/selftests/perf_events/watermark_signal.c
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -102,7 +102,7 @@ TEST(watermark_signal)
}
p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (p == NULL) {
+ if (p == MAP_FAILED) {
perror("mmap");
goto cleanup;
}
diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c
index a734553718da..1026d8c400e1 100644
--- a/tools/testing/selftests/proc/proc-maps-race.c
+++ b/tools/testing/selftests/proc/proc-maps-race.c
@@ -17,8 +17,8 @@
*/
/*
* Fork a child that concurrently modifies address space while the main
- * process is reading /proc/$PID/maps and verifying the results. Address
- * space modifications include:
+ * process is reading /proc/$PID/maps and /proc/$PID/smaps, verifying the
+ * results. Address space modifications include:
* VMA splitting and merging
*
*/
@@ -39,6 +39,13 @@
#include <sys/types.h>
#include <sys/wait.h>
+#define min(a, b) \
+ ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+
/* /proc/pid/maps parsing routines */
struct page_content {
char *data;
@@ -66,6 +73,11 @@ enum test_state {
TEST_DONE,
};
+enum maps_file {
+ MAPS,
+ SMAPS,
+};
+
struct vma_modifier_info;
FIXTURE(proc_maps_race)
@@ -76,7 +88,9 @@ FIXTURE(proc_maps_race)
struct line_content last_line;
struct line_content first_line;
unsigned long duration_sec;
+ enum maps_file maps_file;
int shared_mem_size;
+ int skip_pages;
int page_size;
int vma_count;
bool verbose;
@@ -84,6 +98,19 @@ FIXTURE(proc_maps_race)
pid_t pid;
};
+FIXTURE_VARIANT(proc_maps_race)
+{
+ const enum maps_file maps_file;
+};
+
+FIXTURE_VARIANT_ADD(proc_maps_race, maps) {
+ .maps_file = MAPS,
+};
+
+FIXTURE_VARIANT_ADD(proc_maps_race, smaps) {
+ .maps_file = SMAPS,
+};
+
typedef bool (*vma_modifier_op)(FIXTURE_DATA(proc_maps_race) *self);
typedef bool (*vma_mod_result_check_op)(struct line_content *mod_last_line,
struct line_content *mod_first_line,
@@ -105,38 +132,102 @@ struct vma_modifier_info {
void *child_mapped_addr[];
};
-
-static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self)
+static bool read_page(FIXTURE_DATA(proc_maps_race) *self,
+ struct page_content *page)
{
ssize_t bytes_read;
- if (lseek(self->maps_fd, 0, SEEK_SET) < 0)
+ bytes_read = read(self->maps_fd, page->data, self->page_size);
+ if (bytes_read <= 0)
return false;
- bytes_read = read(self->maps_fd, self->page1.data, self->page_size);
- if (bytes_read <= 0)
+ /* Make sure data always ends with a newline character. */
+ if (page->data[bytes_read - 1] != '\n')
return false;
- self->page1.size = bytes_read;
+ page->size = bytes_read;
- bytes_read = read(self->maps_fd, self->page2.data, self->page_size);
- if (bytes_read <= 0)
+ return true;
+}
+
+static bool parse_vma_line(char *line_start, char *line_end,
+ unsigned long *start, unsigned long *end)
+{
+ bool found;
+
+ *line_end = '\0'; /* stop sscanf at the EOL */
+ found = (sscanf(line_start, "%lx-%lx", start, end) == 2);
+ *line_end = '\n';
+
+ return found;
+}
+
+static int locate_containing_page(FIXTURE_DATA(proc_maps_race) *self,
+ unsigned long addr, unsigned long size)
+{
+ unsigned long start, end;
+ int page = 0;
+
+ if (lseek(self->maps_fd, 0, SEEK_SET) < 0)
+ return -1;
+
+ while (true) {
+ char *curr_pos;
+ char *end_pos;
+
+ if (!read_page(self, &self->page1))
+ return -1;
+
+ curr_pos = self->page1.data;
+ end_pos = self->page1.data + self->page1.size;
+ while (curr_pos < end_pos) {
+ char *line_end;
+
+ line_end = strchr(curr_pos, '\n');
+ if (!line_end)
+ break;
+
+ if (parse_vma_line(curr_pos, line_end, &start, &end) &&
+ start == addr && end == addr + size)
+ return page;
+
+ curr_pos = line_end + 1;
+ }
+ page++;
+ }
+
+ return 0;
+}
+
+static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self)
+{
+ if (lseek(self->maps_fd, 0, SEEK_SET) < 0)
return false;
- self->page2.size = bytes_read;
+ for (int i = 0; i < self->skip_pages; i++)
+ if (!read_page(self, &self->page1))
+ return false;
- return true;
+ return read_page(self, &self->page1) && read_page(self, &self->page2);
}
-static void copy_first_line(struct page_content *page, char *first_line)
+static void copy_line(const char *line_start, const char *line_end,
+ char *buf, size_t buf_size)
{
- char *pos = strchr(page->data, '\n');
+ size_t len = min(line_end - line_start, buf_size - 1);
- strncpy(first_line, page->data, pos - page->data);
- first_line[pos - page->data] = '\0';
+ strncpy(buf, line_start, len);
+ buf[len] = '\0';
}
-static void copy_last_line(struct page_content *page, char *last_line)
+static void copy_first_line(struct page_content *page, char *first_line,
+ size_t line_size)
+{
+ copy_line(page->data, strchr(page->data, '\n'), first_line, line_size);
+}
+
+static void copy_last_line(struct page_content *page, char *last_line,
+ size_t line_size)
{
/* Get the last line in the first page */
const char *end = page->data + page->size - 1;
@@ -146,8 +237,59 @@ static void copy_last_line(struct page_content *page, char *last_line)
/* search previous newline */
while (pos[-1] != '\n')
pos--;
- strncpy(last_line, pos, end - pos);
- last_line[end - pos] = '\0';
+
+ copy_line(pos, end, last_line, line_size);
+}
+
+static bool copy_first_entry(struct page_content *page, char *first_line,
+ size_t line_size)
+{
+ char *start_pos = page->data;
+
+ while (start_pos < page->data + page->size) {
+ unsigned long start_addr;
+ unsigned long end_addr;
+ char *end_pos;
+
+ end_pos = strchr(start_pos, '\n');
+ if (!end_pos)
+ break;
+
+ if (parse_vma_line(start_pos, end_pos, &start_addr, &end_addr)) {
+ copy_line(start_pos, end_pos, first_line, line_size);
+ return true;
+ }
+
+ start_pos = end_pos + 1;
+ }
+
+ return false;
+}
+
+static bool copy_last_entry(struct page_content *page, char *last_line,
+ size_t line_size)
+{
+ char *end_pos = page->data + page->size - 1;
+ char *start_pos;
+
+ while (end_pos > page->data) {
+ unsigned long start_addr;
+ unsigned long end_addr;
+
+ /* skip last newline */
+ start_pos = end_pos - 1;
+ /* search previous newline */
+ while (start_pos > page->data && start_pos[-1] != '\n')
+ start_pos--;
+ if (parse_vma_line(start_pos, end_pos, &start_addr, &end_addr)) {
+ copy_line(start_pos, end_pos, last_line, line_size);
+ return true;
+ }
+
+ end_pos = start_pos - 1;
+ }
+
+ return false;
}
/* Read the last line of the first page and the first line of the second page */
@@ -158,8 +300,16 @@ static bool read_boundary_lines(FIXTURE_DATA(proc_maps_race) *self,
if (!read_two_pages(self))
return false;
- copy_last_line(&self->page1, last_line->text);
- copy_first_line(&self->page2, first_line->text);
+ if (self->maps_file == MAPS) {
+ copy_last_line(&self->page1, last_line->text, LINE_MAX_SIZE);
+ copy_first_line(&self->page2, first_line->text, LINE_MAX_SIZE);
+ } else if (self->maps_file == SMAPS) {
+ if (!copy_last_entry(&self->page1, last_line->text, LINE_MAX_SIZE) ||
+ !copy_first_entry(&self->page2, first_line->text, LINE_MAX_SIZE))
+ return false;
+ } else {
+ return false;
+ }
return sscanf(last_line->text, "%lx-%lx", &last_line->start_addr,
&last_line->end_addr) == 2 &&
@@ -418,11 +568,14 @@ FIXTURE_SETUP(proc_maps_race)
struct vma_modifier_info *mod_info;
pthread_mutexattr_t mutex_attr;
pthread_condattr_t cond_attr;
+ unsigned long first_map_addr;
+ unsigned long last_map_addr;
unsigned long duration_sec;
char fname[32];
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
self->verbose = verbose && !strncmp(verbose, "1", 1);
+ self->maps_file = variant->maps_file;
duration_sec = duration ? atol(duration) : 0;
self->duration_sec = duration_sec ? duration_sec : 5UL;
@@ -489,7 +642,16 @@ FIXTURE_SETUP(proc_maps_race)
exit(0);
}
- sprintf(fname, "/proc/%d/maps", self->pid);
+ switch (self->maps_file) {
+ case MAPS:
+ sprintf(fname, "/proc/%d/maps", self->pid);
+ break;
+ case SMAPS:
+ sprintf(fname, "/proc/%d/smaps", self->pid);
+ break;
+ default:
+ ksft_exit_fail();
+ }
self->maps_fd = open(fname, O_RDONLY);
ASSERT_NE(self->maps_fd, -1);
@@ -502,6 +664,13 @@ FIXTURE_SETUP(proc_maps_race)
self->page2.data = malloc(self->page_size);
ASSERT_NE(self->page2.data, NULL);
+ first_map_addr = (unsigned long)mod_info->child_mapped_addr[0];
+ last_map_addr = (unsigned long)mod_info->child_mapped_addr[mod_info->vma_count - 1];
+
+ self->skip_pages = locate_containing_page(self,
+ min(first_map_addr, last_map_addr),
+ self->page_size * 3);
+ ASSERT_NE(self->skip_pages, -1);
ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line));
/*
@@ -527,7 +696,6 @@ FIXTURE_SETUP(proc_maps_race)
ASSERT_TRUE(mod_info->addr && mod_info->next_addr);
signal_state(mod_info, PARENT_READY);
-
}
FIXTURE_TEARDOWN(proc_maps_race)
@@ -617,20 +785,20 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split)
last_line_changed = strcmp(new_last_line.text, self->last_line.text) != 0;
first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0;
ASSERT_EQ(last_line_changed, first_line_changed);
-
- /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
- ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
- &vma_start, &vma_end));
- /*
- * The vma at the split address can be either the same as
- * original one (if read before the split) or the same as the
- * first line in the second page (if read after the split).
- */
- ASSERT_TRUE((vma_start == self->last_line.start_addr &&
- vma_end == self->last_line.end_addr) ||
- (vma_start == split_first_line.start_addr &&
- vma_end == split_first_line.end_addr));
-
+ if (self->maps_file == MAPS) {
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
+ &vma_start, &vma_end));
+ /*
+ * The vma at the split address can be either the same as
+ * original one (if read before the split) or the same as the
+ * first line in the second page (if read after the split).
+ */
+ ASSERT_TRUE((vma_start == self->last_line.start_addr &&
+ vma_end == self->last_line.end_addr) ||
+ (vma_start == split_first_line.start_addr &&
+ vma_end == split_first_line.end_addr));
+ }
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
@@ -700,17 +868,18 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize)
strcmp(new_first_line.text, restored_first_line.text),
"Expand result invalid", self));
}
-
- /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
- ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr, &vma_start, &vma_end));
- /*
- * The vma should stay at the same address and have either the
- * original size of 3 pages or 1 page if read after shrinking.
- */
- ASSERT_TRUE(vma_start == self->last_line.start_addr &&
- (vma_end - vma_start == self->page_size * 3 ||
- vma_end - vma_start == self->page_size));
-
+ if (self->maps_file == MAPS) {
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr,
+ &vma_start, &vma_end));
+ /*
+ * The vma should stay at the same address and have either the
+ * original size of 3 pages or 1 page if read after shrinking.
+ */
+ ASSERT_TRUE(vma_start == self->last_line.start_addr &&
+ (vma_end - vma_start == self->page_size * 3 ||
+ vma_end - vma_start == self->page_size));
+ }
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
@@ -780,20 +949,20 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap)
strcmp(new_first_line.text, restored_first_line.text),
"Remap restore result invalid", self));
}
-
- /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
- ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
- &vma_start, &vma_end));
- /*
- * The vma should either stay at the same address and have the
- * original size of 3 pages or we should find the remapped vma
- * at the remap destination address with size of 1 page.
- */
- ASSERT_TRUE((vma_start == self->last_line.start_addr &&
- vma_end - vma_start == self->page_size * 3) ||
- (vma_start == self->last_line.start_addr + self->page_size &&
- vma_end - vma_start == self->page_size));
-
+ if (self->maps_file == MAPS) {
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
+ &vma_start, &vma_end));
+ /*
+ * The vma should either stay at the same address and have the
+ * original size of 3 pages or we should find the remapped vma
+ * at the remap destination address with size of 1 page.
+ */
+ ASSERT_TRUE((vma_start == self->last_line.start_addr &&
+ vma_end - vma_start == self->page_size * 3) ||
+ (vma_start == self->last_line.start_addr + self->page_size &&
+ vma_end - vma_start == self->page_size));
+ }
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
diff --git a/tools/testing/selftests/rdma/Makefile b/tools/testing/selftests/rdma/Makefile
index 7dd7cba7a73c..07af7f15c1bf 100644
--- a/tools/testing/selftests/rdma/Makefile
+++ b/tools/testing/selftests/rdma/Makefile
@@ -2,6 +2,7 @@
TEST_PROGS := rxe_rping_between_netns.sh \
rxe_ipv6.sh \
rxe_socket_with_netns.sh \
- rxe_test_NETDEV_UNREGISTER.sh
+ rxe_test_NETDEV_UNREGISTER.sh \
+ rxe_sent_rcvd_bytes.sh
include ../lib.mk
diff --git a/tools/testing/selftests/rdma/rxe_sent_rcvd_bytes.sh b/tools/testing/selftests/rdma/rxe_sent_rcvd_bytes.sh
new file mode 100755
index 000000000000..0e4fbfeebd22
--- /dev/null
+++ b/tools/testing/selftests/rdma/rxe_sent_rcvd_bytes.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# Configuration
+PORT=4791
+MODS=("tun" "rdma_rxe")
+
+exec > /dev/null
+
+# --- Helper: Cleanup Routine ---
+cleanup() {
+ echo "Cleaning up resources..."
+ rdma link del rxe0 2>/dev/null
+ ip link del tun0 2>/dev/null
+ for m in "${MODS[@]}"; do modprobe -r "$m" 2>/dev/null; done
+}
+
+# Ensure cleanup runs on script exit or interrupt
+trap cleanup EXIT
+
+# --- Phase 1: Environment Check ---
+if [[ $EUID -ne 0 ]]; then
+ echo "Error: This script must be run as root."
+ exit 1
+fi
+
+for m in "${MODS[@]}"; do
+ modprobe "$m" || { echo "Error: Failed to load $m"; exit 1; }
+done
+
+# --- Phase 2: Create Interfaces & RXE Links ---
+echo "Creating tun0 (1.1.1.1) and rxe0..."
+ip tuntap add mode tun tun0
+ip addr add 1.1.1.1/24 dev tun0
+ip link set tun0 up
+rdma link add rxe0 type rxe netdev tun0
+
+# Verify port 4791 is listening
+if ! ss -Huln sport = :$PORT | grep -q ":$PORT"; then
+ echo "Error: UDP port $PORT not found after rxe0 creation"
+ exit 1
+fi
+
+orig_s=`cat /sys/class/infiniband/rxe0/ports/1/counters/port_xmit_data`
+orig_r=`cat /sys/class/infiniband/rxe0/ports/1/counters/port_rcv_data`
+
+rping -s -a 1.1.1.1 -C 3 -v &
+sleep 1
+rping -c -a 1.1.1.1 -C 3 -d -v
+
+new_s=`cat /sys/class/infiniband/rxe0/ports/1/counters/port_xmit_data`
+new_r=`cat /sys/class/infiniband/rxe0/ports/1/counters/port_rcv_data`
+
+echo sent $new_s $orig_s
+echo rcvd $new_r $orig_r
+
+result0=$((new_s - orig_s))
+result1=$((new_r - orig_r))
+
+if [ $result0 != $result1 ]; then
+ echo "Error: sent and rcvd bytes different"
+ echo $result0
+ echo $result1
+ exit 1
+fi
+
+echo "Deleting rxe0..."
+rdma link del rxe0
+
+# Port should now be gone
+if ss -Huln sport = :$PORT | grep -q ":$PORT"; then
+ echo "Error: UDP port $PORT still exists after all links deleted"
+ exit 1
+fi
+
+echo "Test passed successfully."
diff --git a/tools/testing/selftests/riscv/abi/Makefile b/tools/testing/selftests/riscv/abi/Makefile
index ed82ff9c664e..041114675ad5 100644
--- a/tools/testing/selftests/riscv/abi/Makefile
+++ b/tools/testing/selftests/riscv/abi/Makefile
@@ -7,4 +7,4 @@ TEST_GEN_PROGS := pointer_masking
include ../../lib.mk
$(OUTPUT)/pointer_masking: pointer_masking.c
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/cfi/Makefile b/tools/testing/selftests/riscv/cfi/Makefile
index 93b4738c0e2e..418b4b5325a5 100644
--- a/tools/testing/selftests/riscv/cfi/Makefile
+++ b/tools/testing/selftests/riscv/cfi/Makefile
@@ -16,7 +16,7 @@ ifeq ($(shell $(CC) $(CFLAGS) -nostdlib -xc /dev/null -o /dev/null > /dev/null 2
TEST_GEN_PROGS := cfitests
$(OUTPUT)/cfitests: cfitests.c shadowstack.c
- $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^
else
$(shell echo "Toolchain doesn't support CFI, skipping CFI kselftest." >&2)
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
index cec81610a5f2..71e3f26c541b 100644
--- a/tools/testing/selftests/riscv/hwprobe/Makefile
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -9,10 +9,10 @@ TEST_GEN_PROGS := hwprobe cbo which-cpus
include ../../lib.mk
$(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/which-cpus: which-cpus.c sys_hwprobe.S
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
index 4664ed79e20b..24122453e3d0 100644
--- a/tools/testing/selftests/riscv/mm/Makefile
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -12,4 +12,4 @@ TEST_PROGS := run_mmap.sh
include ../../lib.mk
$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h
- $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/sigreturn/Makefile b/tools/testing/selftests/riscv/sigreturn/Makefile
index eb8bac9279a8..8c77508641f3 100644
--- a/tools/testing/selftests/riscv/sigreturn/Makefile
+++ b/tools/testing/selftests/riscv/sigreturn/Makefile
@@ -9,4 +9,4 @@ TEST_GEN_PROGS := sigreturn
include ../../lib.mk
$(OUTPUT)/sigreturn: sigreturn.c
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index 326dafd739bf..7e0017b3fb8b 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -11,29 +11,29 @@ include ../../lib.mk
TEST_GEN_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(TEST_GEN_LIBS))
$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S
- $(CC) -static -c -o$@ $(CFLAGS) $^
+ $(CC) -static -c -o $@ $(CFLAGS) $^
$(OUTPUT)/v_helpers.o: v_helpers.c
- $(CC) -static -c -o$@ $(CFLAGS) $^
+ $(CC) -static -c -o $@ $(CFLAGS) $^
$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
$(OUTPUT)/vstate_ptrace: vstate_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/validate_v_ptrace: validate_v_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
- $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o $@ $(CFLAGS) $(LDFLAGS) $^
EXTRA_CLEAN += $(TEST_GEN_OBJ)
diff --git a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c
index 9f16d39255e7..0d6fcc8e5eb6 100644
--- a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c
+++ b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c
@@ -9,12 +9,7 @@
* Copyright (C) 2026 Cheng-Yang Chou <yphbchou0911@gmail.com>
*/
-#include <vmlinux.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-/* SCX kfunc from scx_kfunc_ids_any set */
-void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
+#include <scx/common.bpf.h>
SEC("struct_ops/ssthresh")
__u32 BPF_PROG(tcp_ca_ssthresh, struct sock *sk)
diff --git a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
index 7f23fb17b1e0..9e802b52b29e 100644
--- a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
@@ -95,7 +95,7 @@ static int scan_dsq_pool(void)
record_peek_result(task->pid);
/* Try to move this task to local */
- if (!moved && scx_bpf_dsq_move_to_local(dsq_id, 0) == 0) {
+ if (!moved && scx_bpf_dsq_move_to_local(dsq_id, 0)) {
moved = 1;
break;
}
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
index 5b6e045e1109..7e342c0cec65 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
@@ -6,6 +6,7 @@
*/
#include <bpf/bpf.h>
#include <scx/common.h>
+#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#include "select_cpu_dfl.bpf.skel.h"
@@ -13,29 +14,44 @@
#define NUM_CHILDREN 1028
+struct select_cpu_dfl_ctx {
+ struct select_cpu_dfl *skel;
+ struct bpf_link *link;
+};
+
static enum scx_test_status setup(void **ctx)
{
- struct select_cpu_dfl *skel;
+ struct select_cpu_dfl_ctx *tctx;
+
+ tctx = malloc(sizeof(*tctx));
+ SCX_FAIL_IF(!tctx, "Failed to allocate test context");
+ tctx->link = NULL;
- skel = select_cpu_dfl__open();
- SCX_FAIL_IF(!skel, "Failed to open");
- SCX_ENUM_INIT(skel);
- SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel");
+ tctx->skel = select_cpu_dfl__open();
+ if (!tctx->skel) {
+ free(tctx);
+ SCX_FAIL("Failed to open");
+ }
+ SCX_ENUM_INIT(tctx->skel);
+ if (select_cpu_dfl__load(tctx->skel)) {
+ select_cpu_dfl__destroy(tctx->skel);
+ free(tctx);
+ SCX_FAIL("Failed to load skel");
+ }
- *ctx = skel;
+ *ctx = tctx;
return SCX_TEST_PASS;
}
static enum scx_test_status run(void *ctx)
{
- struct select_cpu_dfl *skel = ctx;
- struct bpf_link *link;
+ struct select_cpu_dfl_ctx *tctx = ctx;
pid_t pids[NUM_CHILDREN];
- int i, status;
+ int i, status, nforked = 0;
- link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dfl_ops);
- SCX_FAIL_IF(!link, "Failed to attach scheduler");
+ tctx->link = bpf_map__attach_struct_ops(tctx->skel->maps.select_cpu_dfl_ops);
+ SCX_FAIL_IF(!tctx->link, "Failed to attach scheduler");
for (i = 0; i < NUM_CHILDREN; i++) {
pids[i] = fork();
@@ -43,25 +59,31 @@ static enum scx_test_status run(void *ctx)
sleep(1);
exit(0);
}
+ if (pids[i] > 0)
+ nforked++;
}
for (i = 0; i < NUM_CHILDREN; i++) {
+ if (pids[i] <= 0)
+ continue;
SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
SCX_EQ(status, 0);
}
- SCX_ASSERT(!skel->bss->saw_local);
-
- bpf_link__destroy(link);
+ SCX_GT(nforked, 0);
+ SCX_ASSERT(!tctx->skel->bss->saw_local);
return SCX_TEST_PASS;
}
static void cleanup(void *ctx)
{
- struct select_cpu_dfl *skel = ctx;
+ struct select_cpu_dfl_ctx *tctx = ctx;
- select_cpu_dfl__destroy(skel);
+ if (tctx->link)
+ bpf_link__destroy(tctx->link);
+ select_cpu_dfl__destroy(tctx->skel);
+ free(tctx);
}
struct scx_test select_cpu_dfl = {
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 33bb8f3ff8ed..da65f838bd52 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -664,5 +664,43 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress_block 21 clsact"
]
+ },
+ {
+ "id": "9c2a",
+ "name": "Act_ct preserves skb cb across defrag before prio dequeue",
+ "category": [
+ "actions",
+ "ct",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: prio",
+ "$TC qdisc add dev $DUMMY clsact",
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 1 matchall action mirred egress redirect dev $DUMMY"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 1 matchall action ct zone 1 pipe",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "[Ether()/frag for frag in fragment(IP(src='10.0.0.10', dst='10.0.0.1', id=1)/UDP(sport=12345, dport=9)/Raw(b'A' * 4000), fragsize=1400)]"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 1 '^qdisc prio 1:'",
+ "matchPattern": "Sent [1-9][0-9]* bytes [1-9][0-9]* pkt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact",
+ "$TC qdisc del dev $DUMMY clsact",
+ "$TC qdisc del dev $DUMMY root handle 1:"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
index cd1f2ee8f354..ed6a900bb568 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
@@ -250,5 +250,49 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "891f",
+ "name": "Verify DualPI2 GSO backlog accounting with QFQ parent",
+ "category": [
+ "qdisc",
+ "dualpi2",
+ "qfq",
+ "gso"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 1 maxpkt 4096",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: dualpi2",
+ "$TC filter add dev $DUMMY parent 1: matchall classid 1:1"
+ ],
+ "cmdUnderTest": "./tdc_gso.py 10.10.10.10 10.10.10.1 9000 1200 2400",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY",
+ "matchJSON": [
+ {
+ "kind": "qfq",
+ "handle": "1:",
+ "packets": 2,
+ "backlog": 0,
+ "qlen": 0
+ },
+ {
+ "kind": "dualpi2",
+ "handle": "2:",
+ "packets": 2,
+ "backlog": 0,
+ "qlen": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc_gso.py b/tools/testing/selftests/tc-testing/tdc_gso.py
new file mode 100755
index 000000000000..b66528ea4b68
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_gso.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+tdc_gso.py - send a UDP GSO datagram
+
+Copyright (C) 2026 Xingquan Liu <b1n@b1n.io>
+"""
+
+import argparse
+import socket
+import struct
+import sys
+
+UDP_MAX_SEGMENTS = 1 << 7
+
+
+parser = argparse.ArgumentParser(description="UDP GSO datagram sender")
+parser.add_argument("src", help="source IPv4 address")
+parser.add_argument("dst", help="destination IPv4 address")
+parser.add_argument("port", type=int, help="destination UDP port")
+parser.add_argument("gso_size", type=int, help="UDP GSO segment payload size")
+parser.add_argument("payload_len", type=int, help="total UDP payload length")
+args = parser.parse_args()
+
+if args.gso_size <= 0 or args.gso_size > 0xFFFF:
+ parser.error("gso_size must fit in an unsigned 16-bit integer")
+if args.payload_len <= args.gso_size:
+ parser.error("payload_len must be larger than gso_size")
+if args.payload_len > args.gso_size * UDP_MAX_SEGMENTS:
+ parser.error("payload_len exceeds UDP_MAX_SEGMENTS")
+
+SOL_UDP = getattr(socket, "SOL_UDP", socket.IPPROTO_UDP)
+UDP_SEGMENT = getattr(socket, "UDP_SEGMENT", 103)
+
+sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+sock.bind((args.src, 0))
+
+payload = b"b" * args.payload_len
+cmsg = [(SOL_UDP, UDP_SEGMENT, struct.pack("=H", args.gso_size))]
+
+sent = sock.sendmsg([payload], cmsg, 0, (args.dst, args.port))
+sys.exit(sent != len(payload))
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index 974b076f9235..33a09f66d7e2 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -22,7 +22,7 @@
#include "kselftest_harness.h"
#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
-#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_BUFFER_SIZE (1024 * 1024)
#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
#define __UEVENT_LISTEN_ALL -1
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
index 0684932d91bf..e6e8cb52ab03 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -1,6 +1,6 @@
ARCH ?= $(shell uname -m)
-ifeq (,$(filter $(ARCH),aarch64 arm64 x86_64))
+ifeq (,$(filter $(ARCH),aarch64 arm64 x86 x86_64))
# Do nothing on unsupported architectures
include ../lib.mk
else
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += vfio_iommufd_setup_test
TEST_GEN_PROGS += vfio_pci_device_test
TEST_GEN_PROGS += vfio_pci_device_init_perf_test
TEST_GEN_PROGS += vfio_pci_driver_test
+TEST_GEN_PROGS += vfio_pci_sriov_uapi_test
TEST_FILES += scripts/cleanup.sh
TEST_FILES += scripts/lib.sh
@@ -23,14 +24,20 @@ include lib/libvfio.mk
CFLAGS += -I$(top_srcdir)/tools/include
CFLAGS += -MD
+CFLAGS += -Wall -Werror
CFLAGS += $(EXTRA_CFLAGS)
LDFLAGS += -pthread
-$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O)
+LDLIBS += -luuid
+
+$(TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(LIBVFIO_O)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@
TEST_GEN_PROGS_O = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+$(TEST_GEN_PROGS_O): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
-include $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
index 1b6da54cc2cb..07862b470777 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -5,6 +5,7 @@
#include <libvfio/assert.h>
#include <libvfio/iommu.h>
#include <libvfio/iova_allocator.h>
+#include <libvfio/sysfs.h>
#include <libvfio/vfio_pci_device.h>
#include <libvfio/vfio_pci_driver.h>
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/assert.h b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
index f4ebd122d9b6..77b68c7129a6 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
@@ -51,4 +51,9 @@
VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
} while (0)
+#define snprintf_assert(_s, _size, _fmt, ...) do { \
+ int __ret = snprintf(_s, _size, _fmt, ##__VA_ARGS__); \
+ VFIO_ASSERT_LT(__ret, _size); \
+} while (0)
+
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/sysfs.h b/tools/testing/selftests/vfio/lib/include/libvfio/sysfs.h
new file mode 100644
index 000000000000..c9ab1ea8f5a9
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/sysfs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_SYSFS_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_SYSFS_H
+
+int sysfs_sriov_totalvfs_get(const char *bdf);
+int sysfs_sriov_numvfs_get(const char *bdf);
+void sysfs_sriov_numvfs_set(const char *bdf, int numvfs);
+char *sysfs_sriov_vf_bdf_get(const char *pf_bdf, int i);
+int sysfs_iommu_group_get(const char *bdf);
+char *sysfs_driver_get(const char *bdf);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_SYSFS_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
index 2858885a89bb..3eabead717bb 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
@@ -38,6 +38,8 @@ struct vfio_pci_device {
#define dev_info(_dev, _fmt, ...) printf("%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
#define dev_err(_dev, _fmt, ...) fprintf(stderr, "%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+struct vfio_pci_device *vfio_pci_device_alloc(const char *bdf, struct iommu *iommu);
+void vfio_pci_device_free(struct vfio_pci_device *device);
struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu);
void vfio_pci_device_cleanup(struct vfio_pci_device *device);
@@ -122,4 +124,13 @@ static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
const char *vfio_pci_get_cdev_path(const char *bdf);
+void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf);
+void __vfio_pci_group_get_device_fd(struct vfio_pci_device *device,
+ const char *bdf, const char *vf_token);
+void vfio_container_set_iommu(struct vfio_pci_device *device);
+void vfio_pci_cdev_open(struct vfio_pci_device *device, const char *bdf);
+int __vfio_device_bind_iommufd(int device_fd, int iommufd, const char *vf_token);
+
+void vfio_device_set_vf_token(int fd, const char *vf_token);
+
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H */
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
index 9f47bceed16f..2b8d73b7d329 100644
--- a/tools/testing/selftests/vfio/lib/libvfio.mk
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -6,6 +6,7 @@ LIBVFIO_SRCDIR := $(selfdir)/vfio/lib
LIBVFIO_C := iommu.c
LIBVFIO_C += iova_allocator.c
LIBVFIO_C += libvfio.c
+LIBVFIO_C += sysfs.c
LIBVFIO_C += vfio_pci_device.c
LIBVFIO_C += vfio_pci_driver.c
@@ -19,11 +20,13 @@ LIBVFIO_OUTPUT := $(OUTPUT)/libvfio
LIBVFIO_O := $(patsubst %.c, $(LIBVFIO_OUTPUT)/%.o, $(LIBVFIO_C))
LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
-$(shell mkdir -p $(LIBVFIO_O_DIRS))
+
+$(LIBVFIO_O_DIRS):
+ mkdir -p $@
CFLAGS += -I$(LIBVFIO_SRCDIR)/include
-$(LIBVFIO_O): $(LIBVFIO_OUTPUT)/%.o : $(LIBVFIO_SRCDIR)/%.c
+$(LIBVFIO_O): $(LIBVFIO_OUTPUT)/%.o : $(LIBVFIO_SRCDIR)/%.c | $(LIBVFIO_O_DIRS)
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
EXTRA_CLEAN += $(LIBVFIO_OUTPUT)
diff --git a/tools/testing/selftests/vfio/lib/sysfs.c b/tools/testing/selftests/vfio/lib/sysfs.c
new file mode 100644
index 000000000000..11415448b2e2
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/sysfs.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/limits.h>
+
+#include <libvfio.h>
+
+#define readlink_safe(_path, _buf) ({ \
+ int __ret; \
+ \
+ _Static_assert(!__builtin_types_compatible_p( \
+ __typeof__(_buf), char *), \
+ "readlink_safe: _buf must be an array, not a pointer"); \
+ \
+ __ret = readlink(_path, _buf, sizeof(_buf) - 1); \
+ if (__ret != -1) \
+ _buf[__ret] = 0; \
+ __ret; \
+})
+
+static void readlink_base(const char *path, const char *data_fmt, void *out_data)
+{
+ char rl_path[PATH_MAX];
+ int ret;
+
+ ret = readlink_safe(path, rl_path);
+ VFIO_ASSERT_NE(ret, -1);
+
+ ret = sscanf(basename(rl_path), data_fmt, out_data);
+ VFIO_ASSERT_EQ(ret, 1);
+}
+
+static int sysfs_val_get_int(const char *component, const char *name,
+ const char *file)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ int ret;
+ int fd;
+
+ snprintf_assert(path, PATH_MAX, "/sys/bus/pci/%s/%s/%s", component, name, file);
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ VFIO_ASSERT_GT(read(fd, buf, ARRAY_SIZE(buf)), 0);
+ VFIO_ASSERT_EQ(close(fd), 0);
+
+ errno = 0;
+ ret = strtol(buf, NULL, 0);
+ VFIO_ASSERT_EQ(errno, 0, "sysfs path \"%s\" is not an integer: \"%s\"\n", path, buf);
+
+ return ret;
+}
+
+static void sysfs_val_set(const char *component, const char *name,
+ const char *file, const char *val)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ snprintf_assert(path, PATH_MAX, "/sys/bus/pci/%s/%s/%s", component, name, file);
+ VFIO_ASSERT_GT(fd = open(path, O_WRONLY), 0);
+
+ VFIO_ASSERT_EQ(write(fd, val, strlen(val)), strlen(val));
+ VFIO_ASSERT_EQ(close(fd), 0);
+}
+
+static int sysfs_device_val_get(const char *bdf, const char *file)
+{
+ return sysfs_val_get_int("devices", bdf, file);
+}
+
+static void sysfs_device_val_set(const char *bdf, const char *file, const char *val)
+{
+ sysfs_val_set("devices", bdf, file, val);
+}
+
+static void sysfs_device_val_set_int(const char *bdf, const char *file, int val)
+{
+ char val_str[32];
+
+ snprintf_assert(val_str, sizeof(val_str), "%d", val);
+ sysfs_device_val_set(bdf, file, val_str);
+}
+
+int sysfs_sriov_totalvfs_get(const char *bdf)
+{
+ return sysfs_device_val_get(bdf, "sriov_totalvfs");
+}
+
+int sysfs_sriov_numvfs_get(const char *bdf)
+{
+ return sysfs_device_val_get(bdf, "sriov_numvfs");
+}
+
+void sysfs_sriov_numvfs_set(const char *bdf, int numvfs)
+{
+ sysfs_device_val_set_int(bdf, "sriov_numvfs", numvfs);
+}
+
+char *sysfs_sriov_vf_bdf_get(const char *pf_bdf, int i)
+{
+ char path[PATH_MAX];
+ char *out_vf_bdf;
+
+ /* Fit "0000:00:00.0" */
+ out_vf_bdf = calloc(16, sizeof(char));
+ VFIO_ASSERT_NOT_NULL(out_vf_bdf);
+
+ snprintf_assert(path, PATH_MAX, "/sys/bus/pci/devices/%s/virtfn%d", pf_bdf, i);
+ readlink_base(path, "%s", out_vf_bdf);
+
+ return out_vf_bdf;
+}
+
+int sysfs_iommu_group_get(const char *bdf)
+{
+ char path[PATH_MAX];
+ int group;
+
+ snprintf_assert(path, PATH_MAX, "/sys/bus/pci/devices/%s/iommu_group", bdf);
+ readlink_base(path, "%d", &group);
+
+ return group;
+}
+
+char *sysfs_driver_get(const char *bdf)
+{
+ char driver_path[PATH_MAX];
+ char path[PATH_MAX];
+ char *out_driver;
+ int ret;
+
+ snprintf_assert(path, PATH_MAX, "/sys/bus/pci/devices/%s/driver", bdf);
+ ret = readlink_safe(path, driver_path);
+ if (ret == -1) {
+ if (errno == ENOENT)
+ return NULL;
+
+ VFIO_FAIL("Failed to read %s\n", path);
+ }
+
+ out_driver = strdup(basename(driver_path));
+ VFIO_ASSERT_NOT_NULL(out_driver);
+
+ return out_driver;
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index fc75e04ef010..94dc5fcecbeb 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -22,11 +22,11 @@
#include <linux/types.h>
#include <linux/vfio.h>
+#include <uuid/uuid.h>
+
#include "kselftest.h"
#include <libvfio.h>
-#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
-
static void vfio_pci_irq_set(struct vfio_pci_device *device,
u32 index, u32 vector, u32 count, int *fds)
{
@@ -115,6 +115,40 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
}
+static int vfio_device_feature_ioctl(int fd, u32 flags, void *data,
+ size_t data_size)
+{
+ u8 buffer[sizeof(struct vfio_device_feature) + data_size] = {};
+ struct vfio_device_feature *feature = (void *)buffer;
+
+ memcpy(feature->data, data, data_size);
+
+ feature->argsz = sizeof(buffer);
+ feature->flags = flags;
+
+ return ioctl(fd, VFIO_DEVICE_FEATURE, feature);
+}
+
+static void vfio_device_feature_set(int fd, u16 feature, void *data, size_t data_size)
+{
+ u32 flags = VFIO_DEVICE_FEATURE_SET | feature;
+ int ret;
+
+ ret = vfio_device_feature_ioctl(fd, flags, data, data_size);
+ VFIO_ASSERT_EQ(ret, 0, "Failed to set feature %u\n", feature);
+}
+
+void vfio_device_set_vf_token(int fd, const char *vf_token)
+{
+ uuid_t token_uuid = {0};
+
+ VFIO_ASSERT_NOT_NULL(vf_token, "vf_token is NULL");
+ VFIO_ASSERT_EQ(uuid_parse(vf_token, token_uuid), 0);
+
+ vfio_device_feature_set(fd, VFIO_DEVICE_FEATURE_PCI_VF_TOKEN,
+ token_uuid, sizeof(uuid_t));
+}
+
static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
struct vfio_region_info *info)
{
@@ -204,25 +238,7 @@ void vfio_pci_device_reset(struct vfio_pci_device *device)
ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
}
-static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
-{
- char dev_iommu_group_path[PATH_MAX] = {0};
- char sysfs_path[PATH_MAX] = {0};
- unsigned int group;
- int ret;
-
- snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
-
- ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
- VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
-
- ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
- VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
-
- return group;
-}
-
-static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
+void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
{
struct vfio_group_status group_status = {
.argsz = sizeof(group_status),
@@ -230,8 +246,8 @@ static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf
char group_path[32];
int group;
- group = vfio_pci_get_group_from_dev(bdf);
- snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
+ group = sysfs_iommu_group_get(bdf);
+ snprintf_assert(group_path, sizeof(group_path), "/dev/vfio/%d", group);
device->group_fd = open(group_path, O_RDWR);
VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
@@ -242,14 +258,37 @@ static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf
ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
}
-static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
+void __vfio_pci_group_get_device_fd(struct vfio_pci_device *device,
+ const char *bdf, const char *vf_token)
+{
+ char arg[64];
+
+ /*
+ * If a vf_token exists, argument to VFIO_GROUP_GET_DEVICE_FD
+ * will be in the form of the following example:
+ * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
+ */
+ if (vf_token)
+ snprintf_assert(arg, ARRAY_SIZE(arg), "%s vf_token=%s", bdf, vf_token);
+ else
+ snprintf_assert(arg, ARRAY_SIZE(arg), "%s", bdf);
+
+ device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, arg);
+}
+
+static void vfio_pci_group_get_device_fd(struct vfio_pci_device *device,
+ const char *bdf, const char *vf_token)
+{
+ __vfio_pci_group_get_device_fd(device, bdf, vf_token);
+ VFIO_ASSERT_GE(device->fd, 0);
+}
+
+void vfio_container_set_iommu(struct vfio_pci_device *device)
{
struct iommu *iommu = device->iommu;
unsigned long iommu_type = iommu->mode->iommu_type;
int ret;
- vfio_pci_group_setup(device, bdf);
-
ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
@@ -259,9 +298,14 @@ static void vfio_pci_container_setup(struct vfio_pci_device *device, const char
* because the IOMMU type is already set.
*/
(void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+}
- device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
- VFIO_ASSERT_GE(device->fd, 0);
+static void vfio_pci_container_setup(struct vfio_pci_device *device,
+ const char *bdf, const char *vf_token)
+{
+ vfio_pci_group_setup(device, bdf);
+ vfio_container_set_iommu(device);
+ vfio_pci_group_get_device_fd(device, bdf, vf_token);
}
static void vfio_pci_device_setup(struct vfio_pci_device *device)
@@ -302,7 +346,7 @@ const char *vfio_pci_get_cdev_path(const char *bdf)
cdev_path = calloc(PATH_MAX, 1);
VFIO_ASSERT_NOT_NULL(cdev_path);
- snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
+ snprintf_assert(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
dir = opendir(dir_path);
VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
@@ -312,7 +356,7 @@ const char *vfio_pci_get_cdev_path(const char *bdf)
if (strncmp("vfio", entry->d_name, 4))
continue;
- snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
+ snprintf_assert(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
break;
}
@@ -322,14 +366,32 @@ const char *vfio_pci_get_cdev_path(const char *bdf)
return cdev_path;
}
-static void vfio_device_bind_iommufd(int device_fd, int iommufd)
+int __vfio_device_bind_iommufd(int device_fd, int iommufd, const char *vf_token)
{
struct vfio_device_bind_iommufd args = {
.argsz = sizeof(args),
.iommufd = iommufd,
};
+ uuid_t token_uuid;
+
+ if (vf_token) {
+ VFIO_ASSERT_EQ(uuid_parse(vf_token, token_uuid), 0);
+ args.flags |= VFIO_DEVICE_BIND_FLAG_TOKEN;
+ args.token_uuid_ptr = (u64)token_uuid;
+ }
+
+ if (ioctl(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args))
+ return -errno;
+
+ return 0;
+}
+
+static void vfio_device_bind_iommufd(int device_fd, int iommufd,
+ const char *vf_token)
+{
+ int ret = __vfio_device_bind_iommufd(device_fd, iommufd, vf_token);
- ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
+ VFIO_ASSERT_EQ(ret, 0, "Failed VFIO_DEVICE_BIND_IOMMUFD ioctl\n");
}
static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
@@ -342,19 +404,24 @@ static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
}
-static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
+void vfio_pci_cdev_open(struct vfio_pci_device *device, const char *bdf)
{
const char *cdev_path = vfio_pci_get_cdev_path(bdf);
device->fd = open(cdev_path, O_RDWR);
VFIO_ASSERT_GE(device->fd, 0);
free((void *)cdev_path);
+}
- vfio_device_bind_iommufd(device->fd, device->iommu->iommufd);
+static void vfio_pci_iommufd_setup(struct vfio_pci_device *device,
+ const char *bdf, const char *vf_token)
+{
+ vfio_pci_cdev_open(device, bdf);
+ vfio_device_bind_iommufd(device->fd, device->iommu->iommufd, vf_token);
vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
}
-struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
+struct vfio_pci_device *vfio_pci_device_alloc(const char *bdf, struct iommu *iommu)
{
struct vfio_pci_device *device;
@@ -365,10 +432,24 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iomm
device->iommu = iommu;
device->bdf = bdf;
+ return device;
+}
+
+void vfio_pci_device_free(struct vfio_pci_device *device)
+{
+ free(device);
+}
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
+{
+ struct vfio_pci_device *device;
+
+ device = vfio_pci_device_alloc(bdf, iommu);
+
if (iommu->mode->container_path)
- vfio_pci_container_setup(device, bdf);
+ vfio_pci_container_setup(device, bdf, NULL);
else
- vfio_pci_iommufd_setup(device, bdf);
+ vfio_pci_iommufd_setup(device, bdf, NULL);
vfio_pci_device_setup(device);
vfio_pci_driver_probe(device);
@@ -397,5 +478,5 @@ void vfio_pci_device_cleanup(struct vfio_pci_device *device)
if (device->group_fd)
VFIO_ASSERT_EQ(close(device->group_fd), 0);
- free(device);
+ vfio_pci_device_free(device);
}
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index abb170bdcef7..7d0de8c79de1 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -44,9 +44,9 @@ static int intel_iommu_mapping_get(const char *bdf, u64 iova,
FILE *file;
char *rest;
- snprintf(iommu_mapping_path, sizeof(iommu_mapping_path),
- "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct",
- bdf);
+ snprintf_assert(iommu_mapping_path, sizeof(iommu_mapping_path),
+ "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct",
+ bdf);
printf("Searching for IOVA 0x%lx in %s\n", iova, iommu_mapping_path);
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c
index 7c0fe8ce3a61..93c11fd5e081 100644
--- a/tools/testing/selftests/vfio/vfio_pci_device_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c
@@ -39,16 +39,17 @@ FIXTURE_TEARDOWN(vfio_pci_device_test)
iommu_cleanup(self->iommu);
}
-#define read_pci_id_from_sysfs(_file) ({ \
- char __sysfs_path[PATH_MAX]; \
- char __buf[32]; \
- int __fd; \
- \
- snprintf(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", device_bdf, _file); \
- ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \
- ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \
- ASSERT_EQ(0, close(__fd)); \
- (u16)strtoul(__buf, NULL, 0); \
+#define read_pci_id_from_sysfs(_file) ({ \
+ char __sysfs_path[PATH_MAX]; \
+ char __buf[32]; \
+ int __fd; \
+ \
+ snprintf_assert(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", \
+ device_bdf, _file); \
+ ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \
+ ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \
+ ASSERT_EQ(0, close(__fd)); \
+ (u16)strtoul(__buf, NULL, 0); \
})
TEST_F(vfio_pci_device_test, config_space_read_write)
diff --git a/tools/testing/selftests/vfio/vfio_pci_sriov_uapi_test.c b/tools/testing/selftests/vfio/vfio_pci_sriov_uapi_test.c
new file mode 100644
index 000000000000..19d657d00b75
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_sriov_uapi_test.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "lib/include/libvfio/assert.h"
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/limits.h>
+
+#include <libvfio.h>
+
+#include "../kselftest_harness.h"
+
+#define UUID_1 "52ac9bff-3a88-4fbd-901a-0d767c3b6c97"
+#define UUID_2 "88594674-90a0-47a9-aea8-9d9b352ac08a"
+
+static const char *pf_bdf;
+static char *vf_bdf;
+
+static pid_t main_pid;
+
+static int container_setup(struct vfio_pci_device *device, const char *bdf,
+ const char *vf_token)
+{
+ vfio_pci_group_setup(device, bdf);
+ vfio_container_set_iommu(device);
+ __vfio_pci_group_get_device_fd(device, bdf, vf_token);
+
+ /* The device fd will be -1 in case of mismatched tokens */
+ return (device->fd < 0);
+}
+
+static int iommufd_setup(struct vfio_pci_device *device, const char *bdf,
+ const char *vf_token)
+{
+ vfio_pci_cdev_open(device, bdf);
+ return __vfio_device_bind_iommufd(device->fd,
+ device->iommu->iommufd, vf_token);
+}
+
+static int device_init(const char *bdf, struct iommu *iommu,
+ const char *vf_token, struct vfio_pci_device **out_dev)
+{
+ struct vfio_pci_device *device = vfio_pci_device_alloc(bdf, iommu);
+ int ret;
+
+ if (iommu->mode->container_path)
+ ret = container_setup(device, bdf, vf_token);
+ else
+ ret = iommufd_setup(device, bdf, vf_token);
+
+ *out_dev = device;
+ return ret;
+}
+
+static void device_cleanup(struct vfio_pci_device *device)
+{
+ if (!device)
+ return;
+
+ if (device->fd > 0)
+ VFIO_ASSERT_EQ(close(device->fd), 0);
+
+ if (device->group_fd)
+ VFIO_ASSERT_EQ(close(device->group_fd), 0);
+
+ vfio_pci_device_free(device);
+}
+
+FIXTURE(vfio_pci_sriov_uapi_test) {
+ struct vfio_pci_device *pf;
+ struct vfio_pci_device *vf;
+ struct iommu *iommu;
+ char *pf_token;
+};
+
+FIXTURE_VARIANT(vfio_pci_sriov_uapi_test) {
+ const char *iommu_mode;
+ char *vf_token;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _vf_token) \
+FIXTURE_VARIANT_ADD(vfio_pci_sriov_uapi_test, _iommu_mode ## _ ## _name) { \
+ .iommu_mode = #_iommu_mode, \
+ .vf_token = (_vf_token), \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(same_uuid, UUID_1);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(diff_uuid, UUID_2);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(null_uuid, NULL);
+
+FIXTURE_SETUP(vfio_pci_sriov_uapi_test)
+{
+ self->iommu = iommu_init(variant->iommu_mode);
+
+ self->pf_token = UUID_1;
+ ASSERT_EQ(device_init(pf_bdf, self->iommu, self->pf_token, &self->pf), 0);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_sriov_uapi_test)
+{
+ device_cleanup(self->vf);
+ device_cleanup(self->pf);
+ iommu_cleanup(self->iommu);
+}
+
+/*
+ * This asserts if the VF device is successfully created if its token matches
+ * with the token used to create/override the PF or fails during a mismatch.
+ */
+#define ASSERT_COND_VF_CREATION(_ret) do { \
+ if (!variant->vf_token || strcmp(self->pf_token, variant->vf_token)) { \
+ ASSERT_NE((_ret), 0); \
+ } else { \
+ ASSERT_EQ((_ret), 0); \
+ } \
+} while (0)
+
+/*
+ * Validate if the UAPI handles correctly and incorrectly set token on the VF.
+ */
+TEST_F(vfio_pci_sriov_uapi_test, init_token_match)
+{
+ int ret;
+
+ ret = device_init(vf_bdf, self->iommu, variant->vf_token, &self->vf);
+ ASSERT_COND_VF_CREATION(ret);
+}
+
+/*
+ * After closing the PF, validate if the VF access still needs the right token.
+ */
+TEST_F(vfio_pci_sriov_uapi_test, pf_early_close)
+{
+ int ret;
+
+ device_cleanup(self->pf);
+
+ /* Clean the 'pf' to avoid calling device_cleanup() again. */
+ self->pf = NULL;
+
+ ret = device_init(vf_bdf, self->iommu, variant->vf_token, &self->vf);
+ ASSERT_COND_VF_CREATION(ret);
+}
+
+/*
+ * After PF device init, override the existing token and validate if the newly
+ * set token is the one that's active.
+ */
+TEST_F(vfio_pci_sriov_uapi_test, override_token)
+{
+ int ret;
+
+ self->pf_token = UUID_2;
+ vfio_device_set_vf_token(self->pf->fd, self->pf_token);
+
+ ret = device_init(vf_bdf, self->iommu, variant->vf_token, &self->vf);
+ ASSERT_COND_VF_CREATION(ret);
+}
+
+static void vf_teardown(void)
+{
+ /*
+ * The child processes, created by TEST_F()s, inherits this atexit()
+ * handler. Hence, check and destroy the VF only when the main/parent
+ * process exits.
+ */
+ if (getpid() != main_pid)
+ return;
+
+ free(vf_bdf);
+ sysfs_sriov_numvfs_set(pf_bdf, 0);
+}
+
+static void vf_setup(void)
+{
+ char *vf_driver;
+ int nr_vfs;
+
+ nr_vfs = sysfs_sriov_totalvfs_get(pf_bdf);
+ if (nr_vfs <= 0)
+ ksft_exit_skip("SR-IOV may not be supported by the PF: %s\n", pf_bdf);
+
+ nr_vfs = sysfs_sriov_numvfs_get(pf_bdf);
+ if (nr_vfs != 0)
+ ksft_exit_skip("SR-IOV already configured for the PF: %s\n", pf_bdf);
+
+ /* Create only one VF for testing */
+ sysfs_sriov_numvfs_set(pf_bdf, 1);
+
+ /*
+ * Setup an exit handler to destroy the VF in case of failures
+ * during further setup at the end of the test run.
+ */
+ main_pid = getpid();
+ VFIO_ASSERT_EQ(atexit(vf_teardown), 0);
+
+ vf_bdf = sysfs_sriov_vf_bdf_get(pf_bdf, 0);
+
+ /*
+ * The VF inherits the driver from the PF.
+ * Ensure this is 'vfio-pci' before proceeding.
+ */
+ vf_driver = sysfs_driver_get(vf_bdf);
+ VFIO_ASSERT_NE(vf_driver, NULL);
+ VFIO_ASSERT_EQ(strcmp(vf_driver, "vfio-pci"), 0);
+ free(vf_driver);
+
+ printf("Created 1 VF (%s) under the PF: %s\n", vf_bdf, pf_bdf);
+}
+
+int main(int argc, char *argv[])
+{
+ pf_bdf = vfio_selftests_get_bdf(&argc, argv);
+ vf_setup();
+
+ return test_harness_run(argc, argv);
+}