diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-03 15:54:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-06-03 15:54:57 -0700 |
commit | 1888e9b4bb78c88514b24ecafa9e4e4faf761747 (patch) | |
tree | be6e177776100fd835059c5afb91096458af5405 /ipc/ipc_sysctl.c | |
parent | 07953c54a10567e484cefd8f8c782025dc68b3b1 (diff) | |
parent | 38cd5b12b7854941ede1954cf5a2393eb94b5d37 (diff) | |
download | lwn-1888e9b4bb78c88514b24ecafa9e4e4faf761747.tar.gz lwn-1888e9b4bb78c88514b24ecafa9e4e4faf761747.zip |
Merge tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ipc sysctl namespace updates from Eric Biederman:
"This updates the ipc sysctls so that they are fundamentally per ipc
namespace. Previously these sysctls depended upon a hack to simulate
being per ipc namespace by looking up the ipc namespace in read or
write. With this set of changes the ipc sysctls are registered per ipc
namespace and open looks up the ipc namespace.
Not only does this series of changes ensure the traditional binding at
open time happens, but it sets a foundation for being able to relax
the permission checks to allow a user namspace root to change the ipc
sysctls for an ipc namespace that the user namespace root requires. To
do this requires the ipc namespace to be known at open time"
* tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
ipc: Remove extra braces
ipc: Check permissions for checkpoint_restart sysctls at open time
ipc: Remove extra1 field abuse to pass ipc namespace
ipc: Use the same namespace to modify and validate
ipc: Store ipc sysctls in the ipc namespace
ipc: Store mqueue sysctls in the ipc namespace
Diffstat (limited to 'ipc/ipc_sysctl.c')
-rw-r--r-- | ipc/ipc_sysctl.c | 205 |
1 files changed, 125 insertions, 80 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index f101c171753f..ef313ecfb53a 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -13,43 +13,17 @@ #include <linux/capability.h> #include <linux/ipc_namespace.h> #include <linux/msg.h> +#include <linux/slab.h> #include "util.h" -static void *get_ipc(struct ctl_table *table) -{ - char *which = table->data; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; - return which; -} - -static int proc_ipc_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); -} - -static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); -} - static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + struct ipc_namespace *ns = + container_of(table->data, struct ipc_namespace, shm_rmid_forced); + int err; + + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err < 0) return err; @@ -58,17 +32,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, return err; } -static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_doulongvec_minmax(&ipc_table, write, buffer, - lenp, ppos); -} - static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -87,14 +50,15 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct ipc_namespace *ns = + container_of(table->data, struct ipc_namespace, sem_ctls); int ret, semmni; - struct ipc_namespace *ns = current->nsproxy->ipc_ns; semmni = ns->sem_ctls[3]; - ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret) - ret = sem_check_semmni(current->nsproxy->ipc_ns); + ret = sem_check_semmni(ns); /* * Reset the semmni value if an error happens. @@ -104,44 +68,31 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_CHECKPOINT_RESTORE -static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos) -{ - struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; - - if (write && !checkpoint_restore_ns_capable(user_ns)) - return -EPERM; - - return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -} -#endif - int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -static struct ctl_table ipc_kern_table[] = { +static struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, @@ -159,7 +110,7 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -168,7 +119,7 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof(init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, @@ -186,7 +137,7 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -202,8 +153,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -211,8 +162,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -220,8 +171,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .mode = 0444, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -229,18 +180,112 @@ static struct ctl_table ipc_kern_table[] = { {} }; -static struct ctl_table ipc_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = ipc_kern_table, - }, - {} +static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +{ + return ¤t->nsproxy->ipc_ns->ipc_set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->ipc_ns->ipc_set == set; +} + +static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) +{ + int mode = table->mode; + +#ifdef CONFIG_CHECKPOINT_RESTORE + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + + if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || + (table->data == &ns->ids[IPC_MSG_IDS].next_id) || + (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && + checkpoint_restore_ns_capable(ns->user_ns)) + mode = 0666; +#endif + return mode; +} + +static struct ctl_table_root set_root = { + .lookup = set_lookup, + .permissions = ipc_permissions, }; +bool setup_ipc_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); + + tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); + if (tbl) { + int i; + + for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { + if (tbl[i].data == &init_ipc_ns.shm_ctlmax) + tbl[i].data = &ns->shm_ctlmax; + + else if (tbl[i].data == &init_ipc_ns.shm_ctlall) + tbl[i].data = &ns->shm_ctlall; + + else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) + tbl[i].data = &ns->shm_ctlmni; + + else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) + tbl[i].data = &ns->shm_rmid_forced; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) + tbl[i].data = &ns->msg_ctlmax; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) + tbl[i].data = &ns->msg_ctlmni; + + else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) + tbl[i].data = &ns->msg_ctlmnb; + + else if (tbl[i].data == &init_ipc_ns.sem_ctls) + tbl[i].data = &ns->sem_ctls; +#ifdef CONFIG_CHECKPOINT_RESTORE + else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) + tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; + + else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) + tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; + + else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) + tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; +#endif + else + tbl[i].data = NULL; + } + + ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); + } + if (!ns->ipc_sysctls) { + kfree(tbl); + retire_sysctl_set(&ns->ipc_set); + return false; + } + + return true; +} + +void retire_ipc_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + tbl = ns->ipc_sysctls->ctl_table_arg; + unregister_sysctl_table(ns->ipc_sysctls); + retire_sysctl_set(&ns->ipc_set); + kfree(tbl); +} + static int __init ipc_sysctl_init(void) { - register_sysctl_table(ipc_root_table); + if (!setup_ipc_sysctls(&init_ipc_ns)) { + pr_warn("ipc sysctl registration failed\n"); + return -ENOMEM; + } return 0; } |