diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-12 11:00:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-12 11:00:22 -0700 |
commit | 676cb4957396411fdb7aba906d5f950fc3de7cc9 (patch) | |
tree | bbc747384f842ace55d4a4bf6a98f27a8304bb20 /ipc | |
parent | 95b8b5953a315081eadbadf49200e57d7e05aae7 (diff) | |
parent | 6a961bffd1c3505c13b4d33bbb8385fe08239cb8 (diff) | |
download | lwn-676cb4957396411fdb7aba906d5f950fc3de7cc9.tar.gz lwn-676cb4957396411fdb7aba906d5f950fc3de7cc9.zip |
Merge tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
- hfs and hfsplus kmap API modernization (Fabio Francesco)
- make crash-kexec work properly when invoked from an NMI-time panic
(Valentin Schneider)
- ntfs bugfixes (Hawkins Jiawei)
- improve IPC msg scalability by replacing atomic_t's with percpu
counters (Jiebin Sun)
- nilfs2 cleanups (Minghao Chi)
- lots of other single patches all over the tree!
* tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (71 commits)
include/linux/entry-common.h: remove has_signal comment of arch_do_signal_or_restart() prototype
proc: test how it holds up with mapping'less process
mailmap: update Frank Rowand email address
ia64: mca: use strscpy() is more robust and safer
init/Kconfig: fix unmet direct dependencies
ia64: update config files
nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure
fork: remove duplicate included header files
init/main.c: remove unnecessary (void*) conversions
proc: mark more files as permanent
nilfs2: remove the unneeded result variable
nilfs2: delete unnecessary checks before brelse()
checkpatch: warn for non-standard fixes tag style
usr/gen_init_cpio.c: remove unnecessary -1 values from int file
ipc/msg: mitigate the lock contention with percpu counter
percpu: add percpu_counter_add_local and percpu_counter_sub_local
fs/ocfs2: fix repeated words in comments
relay: use kvcalloc to alloc page array in relay_alloc_page_array
proc: make config PROC_CHILDREN depend on PROC_FS
fs: uninline inode_maybe_inc_iversion()
...
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 3 | ||||
-rw-r--r-- | ipc/msg.c | 48 | ||||
-rw-r--r-- | ipc/namespace.c | 5 | ||||
-rw-r--r-- | ipc/util.c | 53 | ||||
-rw-r--r-- | ipc/util.h | 4 |
5 files changed, 73 insertions, 40 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 9cf314b3f079..467a194b8a2e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -986,8 +986,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) out_unlock: inode_unlock(d_inode(mnt->mnt_root)); - if (inode) - iput(inode); + iput(inode); mnt_drop_write(mnt); out_name: putname(name); diff --git a/ipc/msg.c b/ipc/msg.c index a0d05775af2c..e4e0990e08f7 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -39,6 +39,7 @@ #include <linux/nsproxy.h> #include <linux/ipc_namespace.h> #include <linux/rhashtable.h> +#include <linux/percpu_counter.h> #include <asm/current.h> #include <linux/uaccess.h> @@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) rcu_read_unlock(); list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { - atomic_dec(&ns->msg_hdrs); + percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1); free_msg(msg); } - atomic_sub(msq->q_cbytes, &ns->msg_bytes); + percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes); ipc_update_pid(&msq->q_lspid, NULL); ipc_update_pid(&msq->q_lrpid, NULL); ipc_rcu_putref(&msq->q_perm, msg_rcu_free); @@ -495,17 +496,22 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid, msginfo->msgssz = MSGSSZ; msginfo->msgseg = MSGSEG; down_read(&msg_ids(ns).rwsem); - if (cmd == MSG_INFO) { + if (cmd == MSG_INFO) msginfo->msgpool = msg_ids(ns).in_use; - msginfo->msgmap = atomic_read(&ns->msg_hdrs); - msginfo->msgtql = atomic_read(&ns->msg_bytes); + max_idx = ipc_get_maxidx(&msg_ids(ns)); + up_read(&msg_ids(ns).rwsem); + if (cmd == MSG_INFO) { + msginfo->msgmap = min_t(int, + percpu_counter_sum(&ns->percpu_msg_hdrs), + INT_MAX); + msginfo->msgtql = min_t(int, + percpu_counter_sum(&ns->percpu_msg_bytes), + INT_MAX); } else { msginfo->msgmap = MSGMAP; msginfo->msgpool = MSGPOOL; msginfo->msgtql = MSGTQL; } - max_idx = ipc_get_maxidx(&msg_ids(ns)); - up_read(&msg_ids(ns).rwsem); return (max_idx < 0) ? 0 : max_idx; } @@ -935,8 +941,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext, list_add_tail(&msg->m_list, &msq->q_messages); msq->q_cbytes += msgsz; msq->q_qnum++; - atomic_add(msgsz, &ns->msg_bytes); - atomic_inc(&ns->msg_hdrs); + percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz); + percpu_counter_add_local(&ns->percpu_msg_hdrs, 1); } err = 0; @@ -1159,8 +1165,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in msq->q_rtime = ktime_get_real_seconds(); ipc_update_pid(&msq->q_lrpid, task_tgid(current)); msq->q_cbytes -= msg->m_ts; - atomic_sub(msg->m_ts, &ns->msg_bytes); - atomic_dec(&ns->msg_hdrs); + percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts); + percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1); ss_wakeup(msq, &wake_q, false); goto out_unlock0; @@ -1297,20 +1303,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, } #endif -void msg_init_ns(struct ipc_namespace *ns) +int msg_init_ns(struct ipc_namespace *ns) { + int ret; + ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; ns->msg_ctlmni = MSGMNI; - atomic_set(&ns->msg_bytes, 0); - atomic_set(&ns->msg_hdrs, 0); + ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL); + if (ret) + goto fail_msg_bytes; + ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL); + if (ret) + goto fail_msg_hdrs; ipc_init_ids(&ns->ids[IPC_MSG_IDS]); + return 0; + +fail_msg_hdrs: + percpu_counter_destroy(&ns->percpu_msg_bytes); +fail_msg_bytes: + return ret; } #ifdef CONFIG_IPC_NS void msg_exit_ns(struct ipc_namespace *ns) { + percpu_counter_destroy(&ns->percpu_msg_bytes); + percpu_counter_destroy(&ns->percpu_msg_hdrs); free_ipcs(ns, &msg_ids(ns), freeque); idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht); diff --git a/ipc/namespace.c b/ipc/namespace.c index e1fcaedba4fa..8316ea585733 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (!setup_ipc_sysctls(ns)) goto fail_mq; + err = msg_init_ns(ns); + if (err) + goto fail_put; + sem_init_ns(ns); - msg_init_ns(ns); shm_init_ns(ns); return ns; diff --git a/ipc/util.c b/ipc/util.c index a2208d0f26b2..05cb9de66735 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -782,28 +782,37 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s) return iter->pid_ns; } -/* - * This routine locks the ipc structure found at least at position pos. +/** + * sysvipc_find_ipc - Find and lock the ipc structure based on seq pos + * @ids: ipc identifier set + * @pos: expected position + * + * The function finds an ipc structure, based on the sequence file + * position @pos. If there is no ipc structure at position @pos, then + * the successor is selected. + * If a structure is found, then it is locked (both rcu_read_lock() and + * ipc_lock_object()) and @pos is set to the position needed to locate + * the found ipc structure. + * If nothing is found (i.e. EOF), @pos is not modified. + * + * The function returns the found ipc structure, or NULL at EOF. */ -static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, - loff_t *new_pos) +static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t *pos) { - struct kern_ipc_perm *ipc = NULL; - int max_idx = ipc_get_maxidx(ids); + int tmpidx; + struct kern_ipc_perm *ipc; - if (max_idx == -1 || pos > max_idx) - goto out; + /* convert from position to idr index -> "-1" */ + tmpidx = *pos - 1; - for (; pos <= max_idx; pos++) { - ipc = idr_find(&ids->ipcs_idr, pos); - if (ipc != NULL) { - rcu_read_lock(); - ipc_lock_object(ipc); - break; - } + ipc = idr_get_next(&ids->ipcs_idr, &tmpidx); + if (ipc != NULL) { + rcu_read_lock(); + ipc_lock_object(ipc); + + /* convert from idr index to position -> "+1" */ + *pos = tmpidx + 1; } -out: - *new_pos = pos + 1; return ipc; } @@ -817,11 +826,13 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) if (ipc && ipc != SEQ_START_TOKEN) ipc_unlock(ipc); - return sysvipc_find_ipc(&iter->ns->ids[iface->ids], *pos, pos); + /* Next -> search for *pos+1 */ + (*pos)++; + return sysvipc_find_ipc(&iter->ns->ids[iface->ids], pos); } /* - * File positions: pos 0 -> header, pos n -> ipc id = n - 1. + * File positions: pos 0 -> header, pos n -> ipc idx = n - 1. * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START. */ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) @@ -846,8 +857,8 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - /* Find the (pos-1)th ipc */ - return sysvipc_find_ipc(ids, *pos - 1, pos); + /* Otherwise return the correct ipc structure */ + return sysvipc_find_ipc(ids, pos); } static void sysvipc_proc_stop(struct seq_file *s, void *it) diff --git a/ipc/util.h b/ipc/util.h index 2dd7ce0416d8..b2906e366539 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { } #ifdef CONFIG_SYSVIPC void sem_init_ns(struct ipc_namespace *ns); -void msg_init_ns(struct ipc_namespace *ns); +int msg_init_ns(struct ipc_namespace *ns); void shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); @@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); #else static inline void sem_init_ns(struct ipc_namespace *ns) { } -static inline void msg_init_ns(struct ipc_namespace *ns) { } +static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; } static inline void shm_init_ns(struct ipc_namespace *ns) { } static inline void sem_exit_ns(struct ipc_namespace *ns) { } |