summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:05:50 +0530
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:05:50 +0530
commit37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f (patch)
tree88594b10c60e16fd03223136a230ed72e42fce62 /tools
parent9c9e6bd4cca02f2d183eb260451fb6018f9ee67e (diff)
parentee8ab98f831226d69d43ccd93f53c50e6f19b389 (diff)
downloadlwn-37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f.tar.gz
lwn-37c405aeaa5c2cbe04c3c727e3989a16a2e9f30f.zip
Merge tag 'kernel-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc kernel updates from Christian Brauner: "Fixes - rhashtable: give each instance its own lockdep class syzbot reported a circular locking dependency between ht->mutex and fs_reclaim via the simple_xattrs rhashtable being torn down during inode eviction. The predicted deadlock cannot occur: rhashtable_free_and_destroy() cancels the deferred worker before taking ht->mutex and acquisitions on distinct rhashtables are on distinct mutexes. Lockdep flags a cycle anyway because every ht->mutex in the kernel shared the single static lockdep class from rhashtable_init_noprof(). The lockdep key is lifted to a per-call-site static key so every rhashtable instance gets its own class. - selftests/clone3: fix misuse of the libcap library interface in the cap_checkpoint_restore test and remove unused variables - selftests/pid_namespace: compute the pid_max test limits dynamically instead of hardcoding values below the kernel-enforced minimum of PIDS_PER_CPU_MIN * num_possible_cpus() which made the tests fail on machines with many possible CPUs - selftests: fix the Makefile TARGETS entry for nsfs which wasn't adjusted when the tests moved under filesystems/ Cleanups - ipc/sem.c: use unsigned int for nsops to match the declaration in syscalls.h" * tag 'kernel-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: selftests/clone3: remove unused variables selftests/clone3: fix libcap interface usage ipc/sem.c: use unsigned int for nsops selftests: Fix Makefile target for nsfs rhashtable: give each instance its own lockdep class selftests/pid_namespace: compute pid_max test limits dynamically
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c24
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c156
3 files changed, 130 insertions, 52 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6e59b8f63e41..641a180fb35f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -37,6 +37,7 @@ TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
TARGETS += filesystems/mount-notify
+TARGETS += filesystems/nsfs
TARGETS += filesystems/fuse
TARGETS += filesystems/move_mount
TARGETS += filesystems/empty_mntns
@@ -85,7 +86,6 @@ TARGETS += net/ppp
TARGETS += net/rds
TARGETS += net/tcp_ao
TARGETS += nolibc
-TARGETS += nsfs
TARGETS += pci_endpoint
TARGETS += pcie_bwctrl
TARGETS += perf_events
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index e82281efa273..ab62bcf4107d 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -53,9 +53,6 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
}
if (pid == 0) {
- int ret;
- char tmp = 0;
-
TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]);
if (set_tid[0] != getpid())
@@ -87,15 +84,11 @@ static int test_clone3_set_tid(struct __test_metadata *_metadata,
return ret;
}
-struct libcap {
- struct __user_cap_header_struct hdr;
- struct __user_cap_data_struct data[2];
-};
-
static int set_capability(void)
{
- cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID };
- struct libcap *cap;
+ cap_value_t cap_values[] = {
+ CAP_SETUID, CAP_SETGID, CAP_CHECKPOINT_RESTORE
+ };
int ret = -1;
cap_t caps;
@@ -111,14 +104,8 @@ static int set_capability(void)
goto out;
}
- cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET);
- cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET);
-
- cap = (struct libcap *) caps;
-
- /* 40 -> CAP_CHECKPOINT_RESTORE */
- cap->data[1].effective |= 1 << (40 - 32);
- cap->data[1].permitted |= 1 << (40 - 32);
+ cap_set_flag(caps, CAP_EFFECTIVE, 3, cap_values, CAP_SET);
+ cap_set_flag(caps, CAP_PERMITTED, 3, cap_values, CAP_SET);
if (cap_set_proc(caps)) {
perror("cap_set_proc");
@@ -135,7 +122,6 @@ TEST(clone3_cap_checkpoint_restore)
{
pid_t pid;
int status;
- int ret = 0;
pid_t set_tid[1];
test_clone3_supported();
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index c9519e7385b6..5d686a09aa15 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -12,10 +12,74 @@
#include <syscall.h>
#include <sys/mount.h>
#include <sys/wait.h>
+#include <unistd.h>
#include "kselftest_harness.h"
#include "../pidfd/pidfd.h"
+/*
+ * The kernel computes the minimum allowed pid_max as:
+ * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
+ * Mirror that here so the test values are always valid.
+ *
+ * Note: glibc's get_nprocs_conf() returns the number of *configured*
+ * (present) CPUs, not *possible* CPUs. The kernel uses
+ * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
+ * These can differ significantly (e.g. 16 configured vs 128 possible).
+ */
+#define RESERVED_PIDS 300
+#define PIDS_PER_CPU_MIN 8
+
+/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
+static int num_possible_cpus(void)
+{
+ FILE *f;
+ int count = 0;
+ int lo, hi;
+
+ f = fopen("/sys/devices/system/cpu/possible", "r");
+ if (!f)
+ return 0;
+
+ while (fscanf(f, "%d", &lo) == 1) {
+ if (fscanf(f, "-%d", &hi) == 1)
+ count += hi - lo + 1;
+ else
+ count++;
+ /* skip comma separator */
+ fscanf(f, ",");
+ }
+
+ fclose(f);
+ return count;
+}
+
+static int pid_min(void)
+{
+ int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
+
+ return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
+}
+
+/*
+ * Outer and inner pid_max limits used by the tests. The outer limit is
+ * the more restrictive ancestor; the inner limit is set higher in a
+ * nested namespace but must still be capped by the outer limit.
+ * Both are derived from the kernel's minimum so they are always writable.
+ *
+ * Global so that clone callbacks can access them without parameter plumbing.
+ */
+static int outer_limit;
+static int inner_limit;
+
+static int write_int_to_fd(int fd, int val)
+{
+ char buf[12];
+ int len = snprintf(buf, sizeof(buf), "%d", val);
+
+ return write(fd, buf, len);
+}
+
#define __STACK_SIZE (8 * 1024 * 1024)
static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
{
@@ -60,18 +124,18 @@ static int pid_max_cb(void *data)
return -1;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
return -1;
}
- for (int i = 0; i < 501; i++) {
+ for (int i = 0; i < inner_limit + 1; i++) {
pid = fork();
if (pid == 0)
exit(EXIT_SUCCESS);
wait_for_pid(pid);
- if (pid > 500) {
+ if (pid > inner_limit) {
fprintf(stderr, "Managed to create pid number beyond limit\n");
return -1;
}
@@ -106,7 +170,7 @@ static int pid_max_nested_inner(void *data)
return fret;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -133,8 +197,8 @@ static int pid_max_nested_inner(void *data)
return fret;
}
- /* Now make sure that we wrap pids at 400. */
- for (i = 0; i < 510; i++) {
+ /* Now make sure that we wrap pids at outer_limit. */
+ for (i = 0; i < inner_limit + 10; i++) {
pid_t pid;
pid = fork();
@@ -145,7 +209,7 @@ static int pid_max_nested_inner(void *data)
exit(EXIT_SUCCESS);
wait_for_pid(pid);
- if (pid >= 500) {
+ if (pid >= inner_limit) {
fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
return fret;
}
@@ -156,15 +220,19 @@ static int pid_max_nested_inner(void *data)
static int pid_max_nested_outer(void *data)
{
- int fret = -1, nr_procs = 400;
- pid_t pids[1000];
- int fd, i, ret;
+ int fret = -1, nr_procs = 0;
+ pid_t *pids;
+ int fd, ret;
pid_t pid;
+ pids = malloc(outer_limit * sizeof(pid_t));
+ if (!pids)
+ return -1;
+
ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
if (ret) {
fprintf(stderr, "%m - Failed to make rootfs private mount\n");
- return fret;
+ goto out;
}
umount2("/proc", MNT_DETACH);
@@ -172,27 +240,28 @@ static int pid_max_nested_outer(void *data)
ret = mount("proc", "/proc", "proc", 0, NULL);
if (ret) {
fprintf(stderr, "%m - Failed to mount proc\n");
- return fret;
+ goto out;
}
fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
if (fd < 0) {
fprintf(stderr, "%m - Failed to open pid_max\n");
- return fret;
+ goto out;
}
- ret = write(fd, "400", sizeof("400") - 1);
+ ret = write_int_to_fd(fd, outer_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
- return fret;
+ goto out;
}
/*
- * Create 397 processes. This leaves room for do_clone() (398) and
- * one more 399. So creating another process needs to fail.
+ * Create (outer_limit - 4) processes. This leaves room for
+ * do_clone() and one more. So creating another process needs
+ * to fail.
*/
- for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+ for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) {
pid = fork();
if (pid < 0)
goto reap;
@@ -220,20 +289,26 @@ reap:
for (int i = 0; i < nr_procs; i++)
wait_for_pid(pids[i]);
+out:
+ free(pids);
return fret;
}
static int pid_max_nested_limit_inner(void *data)
{
- int fret = -1, nr_procs = 400;
+ int fret = -1, nr_procs = 0;
int fd, ret;
pid_t pid;
- pid_t pids[1000];
+ pid_t *pids;
+
+ pids = malloc(inner_limit * sizeof(pid_t));
+ if (!pids)
+ return -1;
ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
if (ret) {
fprintf(stderr, "%m - Failed to make rootfs private mount\n");
- return fret;
+ goto out;
}
umount2("/proc", MNT_DETACH);
@@ -241,23 +316,23 @@ static int pid_max_nested_limit_inner(void *data)
ret = mount("proc", "/proc", "proc", 0, NULL);
if (ret) {
fprintf(stderr, "%m - Failed to mount proc\n");
- return fret;
+ goto out;
}
fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
if (fd < 0) {
fprintf(stderr, "%m - Failed to open pid_max\n");
- return fret;
+ goto out;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
- return fret;
+ goto out;
}
- for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+ for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) {
pid = fork();
if (pid < 0)
break;
@@ -268,7 +343,7 @@ static int pid_max_nested_limit_inner(void *data)
pids[nr_procs] = pid;
}
- if (nr_procs >= 400) {
+ if (nr_procs >= outer_limit) {
fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
goto reap;
}
@@ -279,6 +354,8 @@ reap:
for (int i = 0; i < nr_procs; i++)
wait_for_pid(pids[i]);
+out:
+ free(pids);
return fret;
}
@@ -307,7 +384,7 @@ static int pid_max_nested_limit_outer(void *data)
return -1;
}
- ret = write(fd, "400", sizeof("400") - 1);
+ ret = write_int_to_fd(fd, outer_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data)
return 0;
}
-TEST(pid_max_simple)
+FIXTURE(pid_max) {
+ int dummy;
+};
+
+FIXTURE_SETUP(pid_max)
{
- pid_t pid;
+ int min = pid_min();
+ outer_limit = min + 100;
+ inner_limit = min + 200;
+}
+
+FIXTURE_TEARDOWN(pid_max)
+{
+}
+
+TEST_F(pid_max, simple)
+{
+ pid_t pid;
pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
ASSERT_GT(pid, 0);
ASSERT_EQ(0, wait_for_pid(pid));
}
-TEST(pid_max_nested_limit)
+TEST_F(pid_max, nested_limit)
{
pid_t pid;
@@ -347,7 +439,7 @@ TEST(pid_max_nested_limit)
ASSERT_EQ(0, wait_for_pid(pid));
}
-TEST(pid_max_nested)
+TEST_F(pid_max, nested)
{
pid_t pid;