summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 11:00:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 11:00:22 -0700
commit676cb4957396411fdb7aba906d5f950fc3de7cc9 (patch)
treebbc747384f842ace55d4a4bf6a98f27a8304bb20 /ipc
parent95b8b5953a315081eadbadf49200e57d7e05aae7 (diff)
parent6a961bffd1c3505c13b4d33bbb8385fe08239cb8 (diff)
downloadlwn-676cb4957396411fdb7aba906d5f950fc3de7cc9.tar.gz
lwn-676cb4957396411fdb7aba906d5f950fc3de7cc9.zip
Merge tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - hfs and hfsplus kmap API modernization (Fabio Francesco) - make crash-kexec work properly when invoked from an NMI-time panic (Valentin Schneider) - ntfs bugfixes (Hawkins Jiawei) - improve IPC msg scalability by replacing atomic_t's with percpu counters (Jiebin Sun) - nilfs2 cleanups (Minghao Chi) - lots of other single patches all over the tree! * tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (71 commits) include/linux/entry-common.h: remove has_signal comment of arch_do_signal_or_restart() prototype proc: test how it holds up with mapping'less process mailmap: update Frank Rowand email address ia64: mca: use strscpy() is more robust and safer init/Kconfig: fix unmet direct dependencies ia64: update config files nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure fork: remove duplicate included header files init/main.c: remove unnecessary (void*) conversions proc: mark more files as permanent nilfs2: remove the unneeded result variable nilfs2: delete unnecessary checks before brelse() checkpatch: warn for non-standard fixes tag style usr/gen_init_cpio.c: remove unnecessary -1 values from int file ipc/msg: mitigate the lock contention with percpu counter percpu: add percpu_counter_add_local and percpu_counter_sub_local fs/ocfs2: fix repeated words in comments relay: use kvcalloc to alloc page array in relay_alloc_page_array proc: make config PROC_CHILDREN depend on PROC_FS fs: uninline inode_maybe_inc_iversion() ...
Diffstat (limited to 'ipc')
-rw-r--r--ipc/mqueue.c3
-rw-r--r--ipc/msg.c48
-rw-r--r--ipc/namespace.c5
-rw-r--r--ipc/util.c53
-rw-r--r--ipc/util.h4
5 files changed, 73 insertions, 40 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 9cf314b3f079..467a194b8a2e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -986,8 +986,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
out_unlock:
inode_unlock(d_inode(mnt->mnt_root));
- if (inode)
- iput(inode);
+ iput(inode);
mnt_drop_write(mnt);
out_name:
putname(name);
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..e4e0990e08f7 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
#include <asm/current.h>
#include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
rcu_read_unlock();
list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
free_msg(msg);
}
- atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
ipc_update_pid(&msq->q_lspid, NULL);
ipc_update_pid(&msq->q_lrpid, NULL);
ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,22 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgssz = MSGSSZ;
msginfo->msgseg = MSGSEG;
down_read(&msg_ids(ns).rwsem);
- if (cmd == MSG_INFO) {
+ if (cmd == MSG_INFO)
msginfo->msgpool = msg_ids(ns).in_use;
- msginfo->msgmap = atomic_read(&ns->msg_hdrs);
- msginfo->msgtql = atomic_read(&ns->msg_bytes);
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
+ up_read(&msg_ids(ns).rwsem);
+ if (cmd == MSG_INFO) {
+ msginfo->msgmap = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_hdrs),
+ INT_MAX);
+ msginfo->msgtql = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_bytes),
+ INT_MAX);
} else {
msginfo->msgmap = MSGMAP;
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_idx = ipc_get_maxidx(&msg_ids(ns));
- up_read(&msg_ids(ns).rwsem);
return (max_idx < 0) ? 0 : max_idx;
}
@@ -935,8 +941,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
- atomic_add(msgsz, &ns->msg_bytes);
- atomic_inc(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
}
err = 0;
@@ -1159,8 +1165,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msq->q_rtime = ktime_get_real_seconds();
ipc_update_pid(&msq->q_lrpid, task_tgid(current));
msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts, &ns->msg_bytes);
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
ss_wakeup(msq, &wake_q, false);
goto out_unlock0;
@@ -1297,20 +1303,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
{
+ int ret;
+
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
ns->msg_ctlmni = MSGMNI;
- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
+ ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_bytes;
+ ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_hdrs;
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ return 0;
+
+fail_msg_hdrs:
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+fail_msg_bytes:
+ return ret;
}
#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+ percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (!setup_ipc_sysctls(ns))
goto fail_mq;
+ err = msg_init_ns(ns);
+ if (err)
+ goto fail_put;
+
sem_init_ns(ns);
- msg_init_ns(ns);
shm_init_ns(ns);
return ns;
diff --git a/ipc/util.c b/ipc/util.c
index a2208d0f26b2..05cb9de66735 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -782,28 +782,37 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s)
return iter->pid_ns;
}
-/*
- * This routine locks the ipc structure found at least at position pos.
+/**
+ * sysvipc_find_ipc - Find and lock the ipc structure based on seq pos
+ * @ids: ipc identifier set
+ * @pos: expected position
+ *
+ * The function finds an ipc structure, based on the sequence file
+ * position @pos. If there is no ipc structure at position @pos, then
+ * the successor is selected.
+ * If a structure is found, then it is locked (both rcu_read_lock() and
+ * ipc_lock_object()) and @pos is set to the position needed to locate
+ * the found ipc structure.
+ * If nothing is found (i.e. EOF), @pos is not modified.
+ *
+ * The function returns the found ipc structure, or NULL at EOF.
*/
-static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
- loff_t *new_pos)
+static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t *pos)
{
- struct kern_ipc_perm *ipc = NULL;
- int max_idx = ipc_get_maxidx(ids);
+ int tmpidx;
+ struct kern_ipc_perm *ipc;
- if (max_idx == -1 || pos > max_idx)
- goto out;
+ /* convert from position to idr index -> "-1" */
+ tmpidx = *pos - 1;
- for (; pos <= max_idx; pos++) {
- ipc = idr_find(&ids->ipcs_idr, pos);
- if (ipc != NULL) {
- rcu_read_lock();
- ipc_lock_object(ipc);
- break;
- }
+ ipc = idr_get_next(&ids->ipcs_idr, &tmpidx);
+ if (ipc != NULL) {
+ rcu_read_lock();
+ ipc_lock_object(ipc);
+
+ /* convert from idr index to position -> "+1" */
+ *pos = tmpidx + 1;
}
-out:
- *new_pos = pos + 1;
return ipc;
}
@@ -817,11 +826,13 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
if (ipc && ipc != SEQ_START_TOKEN)
ipc_unlock(ipc);
- return sysvipc_find_ipc(&iter->ns->ids[iface->ids], *pos, pos);
+ /* Next -> search for *pos+1 */
+ (*pos)++;
+ return sysvipc_find_ipc(&iter->ns->ids[iface->ids], pos);
}
/*
- * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
+ * File positions: pos 0 -> header, pos n -> ipc idx = n - 1.
* SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
*/
static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
@@ -846,8 +857,8 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
if (*pos == 0)
return SEQ_START_TOKEN;
- /* Find the (pos-1)th ipc */
- return sysvipc_find_ipc(ids, *pos - 1, pos);
+ /* Otherwise return the correct ipc structure */
+ return sysvipc_find_ipc(ids, pos);
}
static void sysvipc_proc_stop(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..b2906e366539 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#ifdef CONFIG_SYSVIPC
void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }