diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 03:05:50 +0530 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-15 03:05:50 +0530 |
| commit | 37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f (patch) | |
| tree | 88594b10c60e16fd03223136a230ed72e42fce62 /tools | |
| parent | 9c9e6bd4cca02f2d183eb260451fb6018f9ee67e (diff) | |
| parent | ee8ab98f831226d69d43ccd93f53c50e6f19b389 (diff) | |
| download | lwn-37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f.tar.gz lwn-37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f.zip | |
Merge tag 'kernel-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc kernel updates from Christian Brauner:
"Fixes
- rhashtable: give each instance its own lockdep class
syzbot reported a circular locking dependency between ht->mutex and
fs_reclaim via the simple_xattrs rhashtable being torn down during
inode eviction.
The predicted deadlock cannot occur: rhashtable_free_and_destroy()
cancels the deferred worker before taking ht->mutex and
acquisitions on distinct rhashtables are on distinct mutexes.
Lockdep flags a cycle anyway because every ht->mutex in the kernel
shared the single static lockdep class from
rhashtable_init_noprof().
The lockdep key is lifted to a per-call-site static key so every
rhashtable instance gets its own class.
- selftests/clone3: fix misuse of the libcap library interface in the
cap_checkpoint_restore test and remove unused variables
- selftests/pid_namespace: compute the pid_max test limits
dynamically instead of hardcoding values below the kernel-enforced
minimum of PIDS_PER_CPU_MIN * num_possible_cpus() which made the
tests fail on machines with many possible CPUs
- selftests: fix the Makefile TARGETS entry for nsfs which wasn't
adjusted when the tests moved under filesystems/
Cleanups
- ipc/sem.c: use unsigned int for nsops to match the declaration in
syscalls.h"
* tag 'kernel-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
selftests/clone3: remove unused variables
selftests/clone3: fix libcap interface usage
ipc/sem.c: use unsigned int for nsops
selftests: Fix Makefile target for nsfs
rhashtable: give each instance its own lockdep class
selftests/pid_namespace: compute pid_max test limits dynamically
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/selftests/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 24 | ||||
| -rw-r--r-- | tools/testing/selftests/pid_namespace/pid_max.c | 156 |
3 files changed, 130 insertions, 52 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6e59b8f63e41..641a180fb35f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -37,6 +37,7 @@ TARGETS += filesystems/fat TARGETS += filesystems/overlayfs TARGETS += filesystems/statmount TARGETS += filesystems/mount-notify +TARGETS += filesystems/nsfs TARGETS += filesystems/fuse TARGETS += filesystems/move_mount TARGETS += filesystems/empty_mntns @@ -85,7 +86,6 @@ TARGETS += net/ppp TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nolibc -TARGETS += nsfs TARGETS += pci_endpoint TARGETS += pcie_bwctrl TARGETS += perf_events diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index e82281efa273..ab62bcf4107d 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -53,9 +53,6 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata, } if (pid == 0) { - int ret; - char tmp = 0; - TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]); if (set_tid[0] != getpid()) @@ -87,15 +84,11 @@ static int test_clone3_set_tid(struct __test_metadata *_metadata, return ret; } -struct libcap { - struct __user_cap_header_struct hdr; - struct __user_cap_data_struct data[2]; -}; - static int set_capability(void) { - cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID }; - struct libcap *cap; + cap_value_t cap_values[] = { + CAP_SETUID, CAP_SETGID, CAP_CHECKPOINT_RESTORE + }; int ret = -1; cap_t caps; @@ -111,14 +104,8 @@ static int set_capability(void) goto out; } - cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET); - cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET); - - cap = (struct libcap *) caps; - - /* 40 -> CAP_CHECKPOINT_RESTORE */ - cap->data[1].effective |= 1 << (40 - 32); - cap->data[1].permitted |= 1 << (40 - 32); + cap_set_flag(caps, CAP_EFFECTIVE, 3, cap_values, CAP_SET); + cap_set_flag(caps, CAP_PERMITTED, 3, cap_values, CAP_SET); if (cap_set_proc(caps)) { perror("cap_set_proc"); @@ -135,7 +122,6 @@ TEST(clone3_cap_checkpoint_restore) { pid_t pid; int status; - int ret = 0; pid_t set_tid[1]; test_clone3_supported(); diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index c9519e7385b6..5d686a09aa15 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -12,10 +12,74 @@ #include <syscall.h> #include <sys/mount.h> #include <sys/wait.h> +#include <unistd.h> #include "kselftest_harness.h" #include "../pidfd/pidfd.h" +/* + * The kernel computes the minimum allowed pid_max as: + * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus()) + * Mirror that here so the test values are always valid. + * + * Note: glibc's get_nprocs_conf() returns the number of *configured* + * (present) CPUs, not *possible* CPUs. The kernel uses + * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible. + * These can differ significantly (e.g. 16 configured vs 128 possible). + */ +#define RESERVED_PIDS 300 +#define PIDS_PER_CPU_MIN 8 + +/* Count CPUs from a range list like "0-31" or "0-15,32-47". */ +static int num_possible_cpus(void) +{ + FILE *f; + int count = 0; + int lo, hi; + + f = fopen("/sys/devices/system/cpu/possible", "r"); + if (!f) + return 0; + + while (fscanf(f, "%d", &lo) == 1) { + if (fscanf(f, "-%d", &hi) == 1) + count += hi - lo + 1; + else + count++; + /* skip comma separator */ + fscanf(f, ","); + } + + fclose(f); + return count; +} + +static int pid_min(void) +{ + int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus(); + + return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1); +} + +/* + * Outer and inner pid_max limits used by the tests. The outer limit is + * the more restrictive ancestor; the inner limit is set higher in a + * nested namespace but must still be capped by the outer limit. + * Both are derived from the kernel's minimum so they are always writable. + * + * Global so that clone callbacks can access them without parameter plumbing. + */ +static int outer_limit; +static int inner_limit; + +static int write_int_to_fd(int fd, int val) +{ + char buf[12]; + int len = snprintf(buf, sizeof(buf), "%d", val); + + return write(fd, buf, len); +} + #define __STACK_SIZE (8 * 1024 * 1024) static pid_t do_clone(int (*fn)(void *), void *arg, int flags) { @@ -60,18 +124,18 @@ static int pid_max_cb(void *data) return -1; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); return -1; } - for (int i = 0; i < 501; i++) { + for (int i = 0; i < inner_limit + 1; i++) { pid = fork(); if (pid == 0) exit(EXIT_SUCCESS); wait_for_pid(pid); - if (pid > 500) { + if (pid > inner_limit) { fprintf(stderr, "Managed to create pid number beyond limit\n"); return -1; } @@ -106,7 +170,7 @@ static int pid_max_nested_inner(void *data) return fret; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); @@ -133,8 +197,8 @@ static int pid_max_nested_inner(void *data) return fret; } - /* Now make sure that we wrap pids at 400. */ - for (i = 0; i < 510; i++) { + /* Now make sure that we wrap pids at outer_limit. */ + for (i = 0; i < inner_limit + 10; i++) { pid_t pid; pid = fork(); @@ -145,7 +209,7 @@ static int pid_max_nested_inner(void *data) exit(EXIT_SUCCESS); wait_for_pid(pid); - if (pid >= 500) { + if (pid >= inner_limit) { fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); return fret; } @@ -156,15 +220,19 @@ static int pid_max_nested_inner(void *data) static int pid_max_nested_outer(void *data) { - int fret = -1, nr_procs = 400; - pid_t pids[1000]; - int fd, i, ret; + int fret = -1, nr_procs = 0; + pid_t *pids; + int fd, ret; pid_t pid; + pids = malloc(outer_limit * sizeof(pid_t)); + if (!pids) + return -1; + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); if (ret) { fprintf(stderr, "%m - Failed to make rootfs private mount\n"); - return fret; + goto out; } umount2("/proc", MNT_DETACH); @@ -172,27 +240,28 @@ static int pid_max_nested_outer(void *data) ret = mount("proc", "/proc", "proc", 0, NULL); if (ret) { fprintf(stderr, "%m - Failed to mount proc\n"); - return fret; + goto out; } fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); if (fd < 0) { fprintf(stderr, "%m - Failed to open pid_max\n"); - return fret; + goto out; } - ret = write(fd, "400", sizeof("400") - 1); + ret = write_int_to_fd(fd, outer_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); - return fret; + goto out; } /* - * Create 397 processes. This leaves room for do_clone() (398) and - * one more 399. So creating another process needs to fail. + * Create (outer_limit - 4) processes. This leaves room for + * do_clone() and one more. So creating another process needs + * to fail. */ - for (nr_procs = 0; nr_procs < 396; nr_procs++) { + for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) { pid = fork(); if (pid < 0) goto reap; @@ -220,20 +289,26 @@ reap: for (int i = 0; i < nr_procs; i++) wait_for_pid(pids[i]); +out: + free(pids); return fret; } static int pid_max_nested_limit_inner(void *data) { - int fret = -1, nr_procs = 400; + int fret = -1, nr_procs = 0; int fd, ret; pid_t pid; - pid_t pids[1000]; + pid_t *pids; + + pids = malloc(inner_limit * sizeof(pid_t)); + if (!pids) + return -1; ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); if (ret) { fprintf(stderr, "%m - Failed to make rootfs private mount\n"); - return fret; + goto out; } umount2("/proc", MNT_DETACH); @@ -241,23 +316,23 @@ static int pid_max_nested_limit_inner(void *data) ret = mount("proc", "/proc", "proc", 0, NULL); if (ret) { fprintf(stderr, "%m - Failed to mount proc\n"); - return fret; + goto out; } fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); if (fd < 0) { fprintf(stderr, "%m - Failed to open pid_max\n"); - return fret; + goto out; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); - return fret; + goto out; } - for (nr_procs = 0; nr_procs < 500; nr_procs++) { + for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) { pid = fork(); if (pid < 0) break; @@ -268,7 +343,7 @@ static int pid_max_nested_limit_inner(void *data) pids[nr_procs] = pid; } - if (nr_procs >= 400) { + if (nr_procs >= outer_limit) { fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); goto reap; } @@ -279,6 +354,8 @@ reap: for (int i = 0; i < nr_procs; i++) wait_for_pid(pids[i]); +out: + free(pids); return fret; } @@ -307,7 +384,7 @@ static int pid_max_nested_limit_outer(void *data) return -1; } - ret = write(fd, "400", sizeof("400") - 1); + ret = write_int_to_fd(fd, outer_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); @@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data) return 0; } -TEST(pid_max_simple) +FIXTURE(pid_max) { + int dummy; +}; + +FIXTURE_SETUP(pid_max) { - pid_t pid; + int min = pid_min(); + outer_limit = min + 100; + inner_limit = min + 200; +} + +FIXTURE_TEARDOWN(pid_max) +{ +} + +TEST_F(pid_max, simple) +{ + pid_t pid; pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); ASSERT_GT(pid, 0); ASSERT_EQ(0, wait_for_pid(pid)); } -TEST(pid_max_nested_limit) +TEST_F(pid_max, nested_limit) { pid_t pid; @@ -347,7 +439,7 @@ TEST(pid_max_nested_limit) ASSERT_EQ(0, wait_for_pid(pid)); } -TEST(pid_max_nested) +TEST_F(pid_max, nested) { pid_t pid; |
