summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-06-03 15:54:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-06-03 15:54:57 -0700
commit1888e9b4bb78c88514b24ecafa9e4e4faf761747 (patch)
treebe6e177776100fd835059c5afb91096458af5405 /ipc
parent07953c54a10567e484cefd8f8c782025dc68b3b1 (diff)
parent38cd5b12b7854941ede1954cf5a2393eb94b5d37 (diff)
downloadlwn-1888e9b4bb78c88514b24ecafa9e4e4faf761747.tar.gz
lwn-1888e9b4bb78c88514b24ecafa9e4e4faf761747.zip
Merge tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ipc sysctl namespace updates from Eric Biederman: "This updates the ipc sysctls so that they are fundamentally per ipc namespace. Previously these sysctls depended upon a hack to simulate being per ipc namespace by looking up the ipc namespace in read or write. With this set of changes the ipc sysctls are registered per ipc namespace and open looks up the ipc namespace. Not only does this series of changes ensure the traditional binding at open time happens, but it sets a foundation for being able to relax the permission checks to allow a user namspace root to change the ipc sysctls for an ipc namespace that the user namespace root requires. To do this requires the ipc namespace to be known at open time" * tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: ipc: Remove extra braces ipc: Check permissions for checkpoint_restart sysctls at open time ipc: Remove extra1 field abuse to pass ipc namespace ipc: Use the same namespace to modify and validate ipc: Store ipc sysctls in the ipc namespace ipc: Store mqueue sysctls in the ipc namespace
Diffstat (limited to 'ipc')
-rw-r--r--ipc/ipc_sysctl.c205
-rw-r--r--ipc/mq_sysctl.c121
-rw-r--r--ipc/mqueue.c10
-rw-r--r--ipc/namespace.c10
4 files changed, 205 insertions, 141 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index f101c171753f..ef313ecfb53a 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -13,43 +13,17 @@
#include <linux/capability.h>
#include <linux/ipc_namespace.h>
#include <linux/msg.h>
+#include <linux/slab.h>
#include "util.h"
-static void *get_ipc(struct ctl_table *table)
-{
- char *which = table->data;
- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
- return which;
-}
-
-static int proc_ipc_dointvec(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table ipc_table;
-
- memcpy(&ipc_table, table, sizeof(ipc_table));
- ipc_table.data = get_ipc(table);
-
- return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
-}
-
-static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table ipc_table;
-
- memcpy(&ipc_table, table, sizeof(ipc_table));
- ipc_table.data = get_ipc(table);
-
- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
-}
-
static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct ipc_namespace *ns = current->nsproxy->ipc_ns;
- int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ struct ipc_namespace *ns =
+ container_of(table->data, struct ipc_namespace, shm_rmid_forced);
+ int err;
+
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err < 0)
return err;
@@ -58,17 +32,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
return err;
}
-static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table ipc_table;
- memcpy(&ipc_table, table, sizeof(ipc_table));
- ipc_table.data = get_ipc(table);
-
- return proc_doulongvec_minmax(&ipc_table, write, buffer,
- lenp, ppos);
-}
-
static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -87,14 +50,15 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
+ struct ipc_namespace *ns =
+ container_of(table->data, struct ipc_namespace, sem_ctls);
int ret, semmni;
- struct ipc_namespace *ns = current->nsproxy->ipc_ns;
semmni = ns->sem_ctls[3];
- ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret)
- ret = sem_check_semmni(current->nsproxy->ipc_ns);
+ ret = sem_check_semmni(ns);
/*
* Reset the semmni value if an error happens.
@@ -104,44 +68,31 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
return ret;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
-static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
- int write, void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns;
-
- if (write && !checkpoint_restore_ns_capable(user_ns))
- return -EPERM;
-
- return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-#endif
-
int ipc_mni = IPCMNI;
int ipc_mni_shift = IPCMNI_SHIFT;
int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
-static struct ctl_table ipc_kern_table[] = {
+static struct ctl_table ipc_sysctls[] = {
{
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof(init_ipc_ns.shm_ctlmax),
.mode = 0644,
- .proc_handler = proc_ipc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof(init_ipc_ns.shm_ctlall),
.mode = 0644,
- .proc_handler = proc_ipc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof(init_ipc_ns.shm_ctlmni),
.mode = 0644,
- .proc_handler = proc_ipc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
@@ -159,7 +110,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof(init_ipc_ns.msg_ctlmax),
.mode = 0644,
- .proc_handler = proc_ipc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
@@ -168,7 +119,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof(init_ipc_ns.msg_ctlmni),
.mode = 0644,
- .proc_handler = proc_ipc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
@@ -186,7 +137,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof(init_ipc_ns.msg_ctlmnb),
.mode = 0644,
- .proc_handler = proc_ipc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
@@ -202,8 +153,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "sem_next_id",
.data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
- .mode = 0666,
- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
+ .mode = 0444,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
@@ -211,8 +162,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "msg_next_id",
.data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
- .mode = 0666,
- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
+ .mode = 0444,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
@@ -220,8 +171,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "shm_next_id",
.data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
- .mode = 0666,
- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
+ .mode = 0444,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
@@ -229,18 +180,112 @@ static struct ctl_table ipc_kern_table[] = {
{}
};
-static struct ctl_table ipc_root_table[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = ipc_kern_table,
- },
- {}
+static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
+{
+ return &current->nsproxy->ipc_ns->ipc_set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+ return &current->nsproxy->ipc_ns->ipc_set == set;
+}
+
+static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
+{
+ int mode = table->mode;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+
+ if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
+ (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
+ (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
+ checkpoint_restore_ns_capable(ns->user_ns))
+ mode = 0666;
+#endif
+ return mode;
+}
+
+static struct ctl_table_root set_root = {
+ .lookup = set_lookup,
+ .permissions = ipc_permissions,
};
+bool setup_ipc_sysctls(struct ipc_namespace *ns)
+{
+ struct ctl_table *tbl;
+
+ setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);
+
+ tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
+ if (tbl) {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
+ if (tbl[i].data == &init_ipc_ns.shm_ctlmax)
+ tbl[i].data = &ns->shm_ctlmax;
+
+ else if (tbl[i].data == &init_ipc_ns.shm_ctlall)
+ tbl[i].data = &ns->shm_ctlall;
+
+ else if (tbl[i].data == &init_ipc_ns.shm_ctlmni)
+ tbl[i].data = &ns->shm_ctlmni;
+
+ else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced)
+ tbl[i].data = &ns->shm_rmid_forced;
+
+ else if (tbl[i].data == &init_ipc_ns.msg_ctlmax)
+ tbl[i].data = &ns->msg_ctlmax;
+
+ else if (tbl[i].data == &init_ipc_ns.msg_ctlmni)
+ tbl[i].data = &ns->msg_ctlmni;
+
+ else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb)
+ tbl[i].data = &ns->msg_ctlmnb;
+
+ else if (tbl[i].data == &init_ipc_ns.sem_ctls)
+ tbl[i].data = &ns->sem_ctls;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id)
+ tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
+
+ else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id)
+ tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
+
+ else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id)
+ tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
+#endif
+ else
+ tbl[i].data = NULL;
+ }
+
+ ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
+ }
+ if (!ns->ipc_sysctls) {
+ kfree(tbl);
+ retire_sysctl_set(&ns->ipc_set);
+ return false;
+ }
+
+ return true;
+}
+
+void retire_ipc_sysctls(struct ipc_namespace *ns)
+{
+ struct ctl_table *tbl;
+
+ tbl = ns->ipc_sysctls->ctl_table_arg;
+ unregister_sysctl_table(ns->ipc_sysctls);
+ retire_sysctl_set(&ns->ipc_set);
+ kfree(tbl);
+}
+
static int __init ipc_sysctl_init(void)
{
- register_sysctl_table(ipc_root_table);
+ if (!setup_ipc_sysctls(&init_ipc_ns)) {
+ pr_warn("ipc sysctl registration failed\n");
+ return -ENOMEM;
+ }
return 0;
}
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 72a92a08c848..fbf6a8b93a26 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -9,39 +9,9 @@
#include <linux/ipc_namespace.h>
#include <linux/sysctl.h>
-#ifdef CONFIG_PROC_SYSCTL
-static void *get_mq(struct ctl_table *table)
-{
- char *which = table->data;
- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
- return which;
-}
-
-static int proc_mq_dointvec(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table mq_table;
- memcpy(&mq_table, table, sizeof(mq_table));
- mq_table.data = get_mq(table);
-
- return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
-}
-
-static int proc_mq_dointvec_minmax(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table mq_table;
- memcpy(&mq_table, table, sizeof(mq_table));
- mq_table.data = get_mq(table);
-
- return proc_dointvec_minmax(&mq_table, write, buffer,
- lenp, ppos);
-}
-#else
-#define proc_mq_dointvec NULL
-#define proc_mq_dointvec_minmax NULL
-#endif
+#include <linux/stat.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
static int msg_max_limit_min = MIN_MSGMAX;
static int msg_max_limit_max = HARD_MSGMAX;
@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_queues_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_mq_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.procname = "msg_max",
.data = &init_ipc_ns.mq_msg_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_mq_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &msg_max_limit_min,
.extra2 = &msg_max_limit_max,
},
@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msgsize_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_mq_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msg_default,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_mq_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &msg_max_limit_min,
.extra2 = &msg_max_limit_max,
},
@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msgsize_default,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_mq_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
{}
};
-static struct ctl_table mq_sysctl_dir[] = {
- {
- .procname = "mqueue",
- .mode = 0555,
- .child = mq_sysctls,
- },
- {}
-};
+static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
+{
+ return &current->nsproxy->ipc_ns->mq_set;
+}
-static struct ctl_table mq_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = mq_sysctl_dir,
- },
- {}
+static int set_is_seen(struct ctl_table_set *set)
+{
+ return &current->nsproxy->ipc_ns->mq_set == set;
+}
+
+static struct ctl_table_root set_root = {
+ .lookup = set_lookup,
};
-struct ctl_table_header *mq_register_sysctl_table(void)
+bool setup_mq_sysctls(struct ipc_namespace *ns)
{
- return register_sysctl_table(mq_sysctl_root);
+ struct ctl_table *tbl;
+
+ setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen);
+
+ tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL);
+ if (tbl) {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) {
+ if (tbl[i].data == &init_ipc_ns.mq_queues_max)
+ tbl[i].data = &ns->mq_queues_max;
+
+ else if (tbl[i].data == &init_ipc_ns.mq_msg_max)
+ tbl[i].data = &ns->mq_msg_max;
+
+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max)
+ tbl[i].data = &ns->mq_msgsize_max;
+
+ else if (tbl[i].data == &init_ipc_ns.mq_msg_default)
+ tbl[i].data = &ns->mq_msg_default;
+
+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default)
+ tbl[i].data = &ns->mq_msgsize_default;
+ else
+ tbl[i].data = NULL;
+ }
+
+ ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl);
+ }
+ if (!ns->mq_sysctls) {
+ kfree(tbl);
+ retire_sysctl_set(&ns->mq_set);
+ return false;
+ }
+
+ return true;
+}
+
+void retire_mq_sysctls(struct ipc_namespace *ns)
+{
+ struct ctl_table *tbl;
+
+ tbl = ns->mq_sysctls->ctl_table_arg;
+ unregister_sysctl_table(ns->mq_sysctls);
+ retire_sysctl_set(&ns->mq_set);
+ kfree(tbl);
}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 54cb6264f8cf..12ad7860bb88 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info);
static struct kmem_cache *mqueue_inode_cachep;
-static struct ctl_table_header *mq_sysctl_table;
-
static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
{
return container_of(inode, struct mqueue_inode_info, vfs_inode);
@@ -1727,8 +1725,10 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
- /* ignore failures - they are not fatal */
- mq_sysctl_table = mq_register_sysctl_table();
+ if (!setup_mq_sysctls(&init_ipc_ns)) {
+ pr_warn("sysctl registration failed\n");
+ return -ENOMEM;
+ }
error = register_filesystem(&mqueue_fs_type);
if (error)
@@ -1745,8 +1745,6 @@ static int __init init_mqueue_fs(void)
out_filesystem:
unregister_filesystem(&mqueue_fs_type);
out_sysctl:
- if (mq_sysctl_table)
- unregister_sysctl_table(mq_sysctl_table);
kmem_cache_destroy(mqueue_inode_cachep);
return error;
}
diff --git a/ipc/namespace.c b/ipc/namespace.c
index ae83f0f2651b..754f3237194a 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -59,6 +59,13 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (err)
goto fail_put;
+ err = -ENOMEM;
+ if (!setup_mq_sysctls(ns))
+ goto fail_put;
+
+ if (!setup_ipc_sysctls(ns))
+ goto fail_put;
+
sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);
@@ -125,6 +132,9 @@ static void free_ipc_ns(struct ipc_namespace *ns)
msg_exit_ns(ns);
shm_exit_ns(ns);
+ retire_mq_sysctls(ns);
+ retire_ipc_sysctls(ns);
+
dec_ipc_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);