summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 08:17:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 08:17:51 -0700
commit823e75f723aa3fefd5d2eecbf8636184ca4790fc (patch)
tree3a481d7077a27ba86006f431697b5cac2e59ab58
parent149b306089b88e186942a8d6647028ae6683aaf9 (diff)
parentd69f3bad4675ac519d41ca2b11e1c00ca115cecd (diff)
downloadlwn-823e75f723aa3fefd5d2eecbf8636184ca4790fc.tar.gz
lwn-823e75f723aa3fefd5d2eecbf8636184ca4790fc.zip
Merge branch 'ipc-scalability'
Merge IPC cleanup and scalability patches from Andrew Morton. This cleans up many of the oddities in the IPC code, uses the list iterator helpers, splits out locking and adds per-semaphore locks for greater scalability of the IPC semaphore code. Most normal user-level locking by now uses futexes (ie pthreads, but also a lot of specialized locks), but SysV IPC semaphores are apparently still used in some big applications, either for portability reasons, or because they offer tracking and undo (and you don't need to have a special shared memory area for them). Our IPC semaphore scalability was pitiful. We used to lock much too big ranges, and we used to have a single ipc lock per ipc semaphore array. Most loads never cared, but some do. There are some numbers in the individual commits. * ipc-scalability: ipc: sysv shared memory limited to 8TiB ipc/msg.c: use list_for_each_entry_[safe] for list traversing ipc,sem: fine grained locking for semtimedop ipc,sem: have only one list in struct sem_queue ipc,sem: open code and rename sem_lock ipc,sem: do not hold ipc lock more than necessary ipc: introduce lockless pre_down ipcctl ipc: introduce obtaining a lockless ipc object ipc: remove bogus lock comment for ipc_checkid ipc/msgutil.c: use linux/uaccess.h ipc: refactor msg list search into separate function ipc: simplify msg list search ipc: implement MSG_COPY as a new receive mode ipc: remove msg handling from queue scan ipc: set EFAULT as default error in load_msg() ipc: tighten msg copy loops ipc: separate msg allocation from userspace copy ipc: clamp with min()
-rw-r--r--include/linux/ipc_namespace.h2
-rw-r--r--ipc/msg.c124
-rw-r--r--ipc/msgutil.c110
-rw-r--r--ipc/sem.c474
-rw-r--r--ipc/shm.c2
-rw-r--r--ipc/util.c150
-rw-r--r--ipc/util.h19
7 files changed, 540 insertions, 341 deletions
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ae221a7b5092..c4d870b0d5e6 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -43,8 +43,8 @@ struct ipc_namespace {
size_t shm_ctlmax;
size_t shm_ctlall;
+ unsigned long shm_tot;
int shm_ctlmni;
- int shm_tot;
/*
* Defines whether IPC_RMID is forced for _all_ shm segments regardless
* of shmctl()
diff --git a/ipc/msg.c b/ipc/msg.c
index fede1d06ef30..d0c6d967b390 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -66,6 +66,7 @@ struct msg_sender {
#define SEARCH_EQUAL 2
#define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4
+#define SEARCH_NUMBER 5
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
@@ -237,14 +238,9 @@ static inline void ss_del(struct msg_sender *mss)
static void ss_wakeup(struct list_head *h, int kill)
{
- struct list_head *tmp;
+ struct msg_sender *mss, *t;
- tmp = h->next;
- while (tmp != h) {
- struct msg_sender *mss;
-
- mss = list_entry(tmp, struct msg_sender, list);
- tmp = tmp->next;
+ list_for_each_entry_safe(mss, t, h, list) {
if (kill)
mss->list.next = NULL;
wake_up_process(mss->tsk);
@@ -253,14 +249,9 @@ static void ss_wakeup(struct list_head *h, int kill)
static void expunge_all(struct msg_queue *msq, int res)
{
- struct list_head *tmp;
-
- tmp = msq->q_receivers.next;
- while (tmp != &msq->q_receivers) {
- struct msg_receiver *msr;
+ struct msg_receiver *msr, *t;
- msr = list_entry(tmp, struct msg_receiver, r_list);
- tmp = tmp->next;
+ list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
@@ -278,7 +269,7 @@ static void expunge_all(struct msg_queue *msq, int res)
*/
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
- struct list_head *tmp;
+ struct msg_msg *msg, *t;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
expunge_all(msq, -EIDRM);
@@ -286,11 +277,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
msg_rmid(ns, msq);
msg_unlock(msq);
- tmp = msq->q_messages.next;
- while (tmp != &msq->q_messages) {
- struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
-
- tmp = tmp->next;
+ list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
atomic_dec(&ns->msg_hdrs);
free_msg(msg);
}
@@ -583,6 +570,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
switch(mode)
{
case SEARCH_ANY:
+ case SEARCH_NUMBER:
return 1;
case SEARCH_LESSEQUAL:
if (msg->m_type <=type)
@@ -602,14 +590,9 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{
- struct list_head *tmp;
+ struct msg_receiver *msr, *t;
- tmp = msq->q_receivers.next;
- while (tmp != &msq->q_receivers) {
- struct msg_receiver *msr;
-
- msr = list_entry(tmp, struct msg_receiver, r_list);
- tmp = tmp->next;
+ list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) {
@@ -685,7 +668,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
goto out_unlock_free;
}
ss_add(msq, &s);
- ipc_rcu_getref(msq);
+
+ if (!ipc_rcu_getref(msq)) {
+ err = -EIDRM;
+ goto out_unlock_free;
+ }
+
msg_unlock(msq);
schedule();
@@ -738,6 +726,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
static inline int convert_mode(long *msgtyp, int msgflg)
{
+ if (msgflg & MSG_COPY)
+ return SEARCH_NUMBER;
/*
* find message of correct type.
* msgtyp = 0 => get first.
@@ -774,14 +764,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
* This function creates new kernel message structure, large enough to store
* bufsz message bytes.
*/
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
struct msg_msg *copy;
- *copy_number = *msgtyp;
- *msgtyp = 0;
/*
* Create dummy message to copy real message to.
*/
@@ -797,9 +783,7 @@ static inline void free_copy(struct msg_msg *copy)
free_msg(copy);
}
#else
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
return ERR_PTR(-ENOSYS);
}
@@ -809,6 +793,30 @@ static inline void free_copy(struct msg_msg *copy)
}
#endif
+static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
+{
+ struct msg_msg *msg;
+ long count = 0;
+
+ list_for_each_entry(msg, &msq->q_messages, m_list) {
+ if (testmsg(msg, *msgtyp, mode) &&
+ !security_msg_queue_msgrcv(msq, msg, current,
+ *msgtyp, mode)) {
+ if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
+ *msgtyp = msg->m_type - 1;
+ } else if (mode == SEARCH_NUMBER) {
+ if (*msgtyp == count)
+ return msg;
+ } else
+ return msg;
+ count++;
+ }
+ }
+
+ return ERR_PTR(-EAGAIN);
+}
+
+
long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int msgflg,
long (*msg_handler)(void __user *, struct msg_msg *, size_t))
@@ -818,15 +826,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int mode;
struct ipc_namespace *ns;
struct msg_msg *copy = NULL;
- unsigned long copy_number = 0;
ns = current->nsproxy->ipc_ns;
if (msqid < 0 || (long) bufsz < 0)
return -EINVAL;
if (msgflg & MSG_COPY) {
- copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax),
- msgflg, &msgtyp, &copy_number);
+ copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
if (IS_ERR(copy))
return PTR_ERR(copy);
}
@@ -840,45 +846,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
for (;;) {
struct msg_receiver msr_d;
- struct list_head *tmp;
- long msg_counter = 0;
msg = ERR_PTR(-EACCES);
if (ipcperms(ns, &msq->q_perm, S_IRUGO))
goto out_unlock;
- msg = ERR_PTR(-EAGAIN);
- tmp = msq->q_messages.next;
- while (tmp != &msq->q_messages) {
- struct msg_msg *walk_msg;
-
- walk_msg = list_entry(tmp, struct msg_msg, m_list);
- if (testmsg(walk_msg, msgtyp, mode) &&
- !security_msg_queue_msgrcv(msq, walk_msg, current,
- msgtyp, mode)) {
-
- msg = walk_msg;
- if (mode == SEARCH_LESSEQUAL &&
- walk_msg->m_type != 1) {
- msgtyp = walk_msg->m_type - 1;
- } else if (msgflg & MSG_COPY) {
- if (copy_number == msg_counter) {
- /*
- * Found requested message.
- * Copy it.
- */
- msg = copy_msg(msg, copy);
- if (IS_ERR(msg))
- goto out_unlock;
- break;
- }
- msg = ERR_PTR(-EAGAIN);
- } else
- break;
- msg_counter++;
- }
- tmp = tmp->next;
- }
+ msg = find_msg(msq, &msgtyp, mode);
+
if (!IS_ERR(msg)) {
/*
* Found a suitable message.
@@ -892,8 +866,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
* If we are copying, then do not unlink message and do
* not update queue parameters.
*/
- if (msgflg & MSG_COPY)
+ if (msgflg & MSG_COPY) {
+ msg = copy_msg(msg, copy);
goto out_unlock;
+ }
list_del(&msg->m_list);
msq->q_qnum--;
msq->q_rtime = get_seconds();
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 5df8e4bf1db0..d43439e6eb47 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -17,7 +17,7 @@
#include <linux/ipc_namespace.h>
#include <linux/utsname.h>
#include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
@@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = {
atomic_t nr_ipc_ns = ATOMIC_INIT(1);
struct msg_msgseg {
- struct msg_msgseg* next;
+ struct msg_msgseg *next;
/* the next part of the message follows immediately */
};
-#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+#define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg))
-struct msg_msg *load_msg(const void __user *src, int len)
+
+static struct msg_msg *alloc_msg(int len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
- int err;
int alen;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
- return ERR_PTR(-ENOMEM);
+ return NULL;
msg->next = NULL;
msg->security = NULL;
- if (copy_from_user(msg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
-
len -= alen;
- src = ((char __user *)src) + alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- seg = kmalloc(sizeof(*seg) + alen,
- GFP_KERNEL);
- if (seg == NULL) {
- err = -ENOMEM;
+ alen = min(len, DATALEN_SEG);
+ seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
+ if (seg == NULL)
goto out_err;
- }
*pseg = seg;
seg->next = NULL;
- if (copy_from_user(seg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
pseg = &seg->next;
len -= alen;
- src = ((char __user *)src) + alen;
+ }
+
+ return msg;
+
+out_err:
+ free_msg(msg);
+ return NULL;
+}
+
+struct msg_msg *load_msg(const void __user *src, int len)
+{
+ struct msg_msg *msg;
+ struct msg_msgseg *seg;
+ int err = -EFAULT;
+ int alen;
+
+ msg = alloc_msg(len);
+ if (msg == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ alen = min(len, DATALEN_MSG);
+ if (copy_from_user(msg + 1, src, alen))
+ goto out_err;
+
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ src = (char __user *)src + alen;
+ alen = min(len, DATALEN_SEG);
+ if (copy_from_user(seg + 1, src, alen))
+ goto out_err;
}
err = security_msg_msg_alloc(msg);
@@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
if (src->m_ts > dst->m_ts)
return ERR_PTR(-EINVAL);
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
memcpy(dst + 1, src + 1, alen);
- len -= alen;
- dst_pseg = dst->next;
- src_pseg = src->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- memcpy(dst_pseg + 1, src_pseg + 1, alen);
- dst_pseg = dst_pseg->next;
+ for (dst_pseg = dst->next, src_pseg = src->next;
+ src_pseg != NULL;
+ dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) {
+
len -= alen;
- src_pseg = src_pseg->next;
+ alen = min(len, DATALEN_SEG);
+ memcpy(dst_pseg + 1, src_pseg + 1, alen);
}
dst->m_type = src->m_type;
@@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len)
int alen;
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
+ alen = min(len, DATALEN_MSG);
if (copy_to_user(dest, msg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = msg->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ dest = (char __user *)dest + alen;
+ alen = min(len, DATALEN_SEG);
if (copy_to_user(dest, seg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = seg->next;
}
return 0;
}
diff --git a/ipc/sem.c b/ipc/sem.c
index 5b167d00efa6..e78ee3186d1f 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -94,12 +94,12 @@
struct sem {
int semval; /* current value */
int sempid; /* pid of last operation */
+ spinlock_t lock; /* spinlock for fine-grained semtimedop */
struct list_head sem_pending; /* pending single-sop operations */
};
/* One queue for each sleeping process in the system. */
struct sem_queue {
- struct list_head simple_list; /* queue of pending operations */
struct list_head list; /* queue of pending operations */
struct task_struct *sleeper; /* this process */
struct sem_undo *undo; /* undo structure */
@@ -138,7 +138,6 @@ struct sem_undo_list {
#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
-#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
static int newary(struct ipc_namespace *, struct ipc_params *);
@@ -191,47 +190,164 @@ void __init sem_init (void)
}
/*
+ * If the request contains only one semaphore operation, and there are
+ * no complex transactions pending, lock only the semaphore involved.
+ * Otherwise, lock the entire semaphore array, since we either have
+ * multiple semaphores in our own semops, or we need to look at
+ * semaphores from other pending complex operations.
+ *
+ * Carefully guard against sma->complex_count changing between zero
+ * and non-zero while we are spinning for the lock. The value of
+ * sma->complex_count cannot change while we are holding the lock,
+ * so sem_unlock should be fine.
+ *
+ * The global lock path checks that all the local locks have been released,
+ * checking each local lock once. This means that the local lock paths
+ * cannot start their critical sections while the global lock is held.
+ */
+static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+ int nsops)
+{
+ int locknum;
+ again:
+ if (nsops == 1 && !sma->complex_count) {
+ struct sem *sem = sma->sem_base + sops->sem_num;
+
+ /* Lock just the semaphore we are interested in. */
+ spin_lock(&sem->lock);
+
+ /*
+ * If sma->complex_count was set while we were spinning,
+ * we may need to look at things we did not lock here.
+ */
+ if (unlikely(sma->complex_count)) {
+ spin_unlock(&sem->lock);
+ goto lock_array;
+ }
+
+ /*
+ * Another process is holding the global lock on the
+ * sem_array; we cannot enter our critical section,
+ * but have to wait for the global lock to be released.
+ */
+ if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+ spin_unlock(&sem->lock);
+ spin_unlock_wait(&sma->sem_perm.lock);
+ goto again;
+ }
+
+ locknum = sops->sem_num;
+ } else {
+ int i;
+ /*
+ * Lock the semaphore array, and wait for all of the
+ * individual semaphore locks to go away. The code
+ * above ensures no new single-lock holders will enter
+ * their critical section while the array lock is held.
+ */
+ lock_array:
+ spin_lock(&sma->sem_perm.lock);
+ for (i = 0; i < sma->sem_nsems; i++) {
+ struct sem *sem = sma->sem_base + i;
+ spin_unlock_wait(&sem->lock);
+ }
+ locknum = -1;
+ }
+ return locknum;
+}
+
+static inline void sem_unlock(struct sem_array *sma, int locknum)
+{
+ if (locknum == -1) {
+ spin_unlock(&sma->sem_perm.lock);
+ } else {
+ struct sem *sem = sma->sem_base + locknum;
+ spin_unlock(&sem->lock);
+ }
+ rcu_read_unlock();
+}
+
+/*
* sem_lock_(check_) routines are called in the paths where the rw_mutex
* is not held.
*/
-static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
+static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
+ int id, struct sembuf *sops, int nsops, int *locknum)
+{
+ struct kern_ipc_perm *ipcp;
+ struct sem_array *sma;
+
+ rcu_read_lock();
+ ipcp = ipc_obtain_object(&sem_ids(ns), id);
+ if (IS_ERR(ipcp)) {
+ sma = ERR_CAST(ipcp);
+ goto err;
+ }
+
+ sma = container_of(ipcp, struct sem_array, sem_perm);
+ *locknum = sem_lock(sma, sops, nsops);
+
+ /* ipc_rmid() may have already freed the ID while sem_lock
+ * was spinning: verify that the structure is still valid
+ */
+ if (!ipcp->deleted)
+ return container_of(ipcp, struct sem_array, sem_perm);
+
+ sem_unlock(sma, *locknum);
+ sma = ERR_PTR(-EINVAL);
+err:
+ rcu_read_unlock();
+ return sma;
+}
+
+static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
if (IS_ERR(ipcp))
- return (struct sem_array *)ipcp;
+ return ERR_CAST(ipcp);
return container_of(ipcp, struct sem_array, sem_perm);
}
-static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
- int id)
+static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
+ int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
if (IS_ERR(ipcp))
- return (struct sem_array *)ipcp;
+ return ERR_CAST(ipcp);
return container_of(ipcp, struct sem_array, sem_perm);
}
static inline void sem_lock_and_putref(struct sem_array *sma)
{
- ipc_lock_by_ptr(&sma->sem_perm);
+ rcu_read_lock();
+ sem_lock(sma, NULL, -1);
ipc_rcu_putref(sma);
}
static inline void sem_getref_and_unlock(struct sem_array *sma)
{
- ipc_rcu_getref(sma);
- ipc_unlock(&(sma)->sem_perm);
+ WARN_ON_ONCE(!ipc_rcu_getref(sma));
+ sem_unlock(sma, -1);
}
static inline void sem_putref(struct sem_array *sma)
{
- ipc_lock_by_ptr(&sma->sem_perm);
- ipc_rcu_putref(sma);
- ipc_unlock(&(sma)->sem_perm);
+ sem_lock_and_putref(sma);
+ sem_unlock(sma, -1);
+}
+
+/*
+ * Call inside the rcu read section.
+ */
+static inline void sem_getref(struct sem_array *sma)
+{
+ sem_lock(sma, NULL, -1);
+ WARN_ON_ONCE(!ipc_rcu_getref(sma));
+ sem_unlock(sma, -1);
}
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -324,15 +440,17 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
sma->sem_base = (struct sem *) &sma[1];
- for (i = 0; i < nsems; i++)
+ for (i = 0; i < nsems; i++) {
INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+ spin_lock_init(&sma->sem_base[i].lock);
+ }
sma->complex_count = 0;
INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
- sem_unlock(sma);
+ sem_unlock(sma, -1);
return sma->sem_perm.id;
}
@@ -471,7 +589,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
q->status = IN_WAKEUP;
q->pid = error;
- list_add_tail(&q->simple_list, pt);
+ list_add_tail(&q->list, pt);
}
/**
@@ -489,7 +607,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
int did_something;
did_something = !list_empty(pt);
- list_for_each_entry_safe(q, t, pt, simple_list) {
+ list_for_each_entry_safe(q, t, pt, list) {
wake_up_process(q->sleeper);
/* q can disappear immediately after writing q->status. */
smp_wmb();
@@ -502,9 +620,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
{
list_del(&q->list);
- if (q->nsops == 1)
- list_del(&q->simple_list);
- else
+ if (q->nsops > 1)
sma->complex_count--;
}
@@ -557,9 +673,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
}
/*
* semval is 0. Check if there are wait-for-zero semops.
- * They must be the first entries in the per-semaphore simple queue
+ * They must be the first entries in the per-semaphore queue
*/
- h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list);
+ h = list_first_entry(&curr->sem_pending, struct sem_queue, list);
BUG_ON(h->nsops != 1);
BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
@@ -579,8 +695,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
* @pt: list head for the tasks that must be woken up.
*
* update_queue must be called after a semaphore in a semaphore array
- * was modified. If multiple semaphore were modified, then @semnum
- * must be set to -1.
+ * was modified. If multiple semaphores were modified, update_queue must
+ * be called with semnum = -1, as well as with the number of each modified
+ * semaphore.
* The tasks that must be woken up are added to @pt. The return code
* is stored in q->pid.
* The function return 1 if at least one semop was completed successfully.
@@ -590,30 +707,19 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
struct sem_queue *q;
struct list_head *walk;
struct list_head *pending_list;
- int offset;
int semop_completed = 0;
- /* if there are complex operations around, then knowing the semaphore
- * that was modified doesn't help us. Assume that multiple semaphores
- * were modified.
- */
- if (sma->complex_count)
- semnum = -1;
-
- if (semnum == -1) {
+ if (semnum == -1)
pending_list = &sma->sem_pending;
- offset = offsetof(struct sem_queue, list);
- } else {
+ else
pending_list = &sma->sem_base[semnum].sem_pending;
- offset = offsetof(struct sem_queue, simple_list);
- }
again:
walk = pending_list->next;
while (walk != pending_list) {
int error, restart;
- q = (struct sem_queue *)((char *)walk - offset);
+ q = container_of(walk, struct sem_queue, list);
walk = walk->next;
/* If we are scanning the single sop, per-semaphore list of
@@ -672,9 +778,18 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
if (sma->complex_count || sops == NULL) {
if (update_queue(sma, -1, pt))
otime = 1;
+ }
+
+ if (!sops) {
+ /* No semops; something special is going on. */
+ for (i = 0; i < sma->sem_nsems; i++) {
+ if (update_queue(sma, i, pt))
+ otime = 1;
+ }
goto done;
}
+ /* Check the semaphores that were modified. */
for (i = 0; i < nsops; i++) {
if (sops[i].sem_op > 0 ||
(sops[i].sem_op < 0 &&
@@ -745,6 +860,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
struct sem_queue *q, *tq;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
struct list_head tasks;
+ int i;
/* Free the existing undo structures for this semaphore set. */
assert_spin_locked(&sma->sem_perm.lock);
@@ -763,10 +879,17 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
unlink_queue(sma, q);
wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
}
+ for (i = 0; i < sma->sem_nsems; i++) {
+ struct sem *sem = sma->sem_base + i;
+ list_for_each_entry_safe(q, tq, &sem->sem_pending, list) {
+ unlink_queue(sma, q);
+ wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ }
+ }
/* Remove the semaphore set from the IDR */
sem_rmid(ns, sma);
- sem_unlock(sma);
+ sem_unlock(sma, -1);
wake_up_sem_queue_do(&tasks);
ns->used_sems -= sma->sem_nsems;
@@ -842,18 +965,25 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
case SEM_STAT:
{
struct semid64_ds tbuf;
- int id;
+ int id = 0;
+
+ memset(&tbuf, 0, sizeof(tbuf));
if (cmd == SEM_STAT) {
- sma = sem_lock(ns, semid);
- if (IS_ERR(sma))
- return PTR_ERR(sma);
+ rcu_read_lock();
+ sma = sem_obtain_object(ns, semid);
+ if (IS_ERR(sma)) {
+ err = PTR_ERR(sma);
+ goto out_unlock;
+ }
id = sma->sem_perm.id;
} else {
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
- return PTR_ERR(sma);
- id = 0;
+ rcu_read_lock();
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ err = PTR_ERR(sma);
+ goto out_unlock;
+ }
}
err = -EACCES;
@@ -864,13 +994,11 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
if (err)
goto out_unlock;
- memset(&tbuf, 0, sizeof(tbuf));
-
kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
tbuf.sem_otime = sma->sem_otime;
tbuf.sem_ctime = sma->sem_ctime;
tbuf.sem_nsems = sma->sem_nsems;
- sem_unlock(sma);
+ rcu_read_unlock();
if (copy_semid_to_user(p, &tbuf, version))
return -EFAULT;
return id;
@@ -879,7 +1007,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
return -EINVAL;
}
out_unlock:
- sem_unlock(sma);
+ rcu_read_unlock();
return err;
}
@@ -890,7 +1018,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
struct sem_array *sma;
struct sem* curr;
int err;
- int nsems;
struct list_head tasks;
int val;
#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
@@ -901,31 +1028,39 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
val = arg;
#endif
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
- return PTR_ERR(sma);
+ if (val > SEMVMX || val < 0)
+ return -ERANGE;
INIT_LIST_HEAD(&tasks);
- nsems = sma->sem_nsems;
- err = -EACCES;
- if (ipcperms(ns, &sma->sem_perm, S_IWUGO))
- goto out_unlock;
+ rcu_read_lock();
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
+ return PTR_ERR(sma);
+ }
+
+ if (semnum < 0 || semnum >= sma->sem_nsems) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+
+ if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
err = security_sem_semctl(sma, SETVAL);
- if (err)
- goto out_unlock;
+ if (err) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
- err = -EINVAL;
- if(semnum < 0 || semnum >= nsems)
- goto out_unlock;
+ sem_lock(sma, NULL, -1);
curr = &sma->sem_base[semnum];
- err = -ERANGE;
- if (val > SEMVMX || val < 0)
- goto out_unlock;
-
assert_spin_locked(&sma->sem_perm.lock);
list_for_each_entry(un, &sma->list_id, list_id)
un->semadj[semnum] = 0;
@@ -935,11 +1070,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
do_smart_update(sma, NULL, 0, 0, &tasks);
- err = 0;
-out_unlock:
- sem_unlock(sma);
+ sem_unlock(sma, -1);
wake_up_sem_queue_do(&tasks);
- return err;
+ return 0;
}
static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
@@ -947,27 +1080,34 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
{
struct sem_array *sma;
struct sem* curr;
- int err;
+ int err, nsems;
ushort fast_sem_io[SEMMSL_FAST];
ushort* sem_io = fast_sem_io;
- int nsems;
struct list_head tasks;
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
+ INIT_LIST_HEAD(&tasks);
+
+ rcu_read_lock();
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
return PTR_ERR(sma);
+ }
- INIT_LIST_HEAD(&tasks);
nsems = sma->sem_nsems;
err = -EACCES;
if (ipcperms(ns, &sma->sem_perm,
- cmd == SETALL ? S_IWUGO : S_IRUGO))
- goto out_unlock;
+ cmd == SETALL ? S_IWUGO : S_IRUGO)) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
err = security_sem_semctl(sma, cmd);
- if (err)
- goto out_unlock;
+ if (err) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
err = -EACCES;
switch (cmd) {
@@ -977,7 +1117,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
int i;
if(nsems > SEMMSL_FAST) {
- sem_getref_and_unlock(sma);
+ sem_getref(sma);
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
@@ -987,15 +1127,16 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
err = -EIDRM;
goto out_free;
}
- }
+ } else
+ sem_lock(sma, NULL, -1);
for (i = 0; i < sma->sem_nsems; i++)
sem_io[i] = sma->sem_base[i].semval;
- sem_unlock(sma);
+ sem_unlock(sma, -1);
err = 0;
if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
err = -EFAULT;
@@ -1006,7 +1147,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
int i;
struct sem_undo *un;
- sem_getref_and_unlock(sma);
+ if (!ipc_rcu_getref(sma)) {
+ rcu_read_unlock();
+ return -EIDRM;
+ }
+ rcu_read_unlock();
if(nsems > SEMMSL_FAST) {
sem_io = ipc_alloc(sizeof(ushort)*nsems);
@@ -1031,7 +1176,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
}
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
err = -EIDRM;
goto out_free;
}
@@ -1053,9 +1198,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
}
err = -EINVAL;
- if(semnum < 0 || semnum >= nsems)
- goto out_unlock;
+ if (semnum < 0 || semnum >= nsems) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
+ sem_lock(sma, NULL, -1);
curr = &sma->sem_base[semnum];
switch (cmd) {
@@ -1072,10 +1220,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
err = count_semzcnt(sma,semnum);
goto out_unlock;
}
+
out_unlock:
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+out_wakeup:
wake_up_sem_queue_do(&tasks);
-
out_free:
if(sem_io != fast_sem_io)
ipc_free(sem_io, sizeof(ushort)*nsems);
@@ -1126,33 +1275,39 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
return -EFAULT;
}
- ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd,
- &semid64.sem_perm, 0);
+ ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
+ &semid64.sem_perm, 0);
if (IS_ERR(ipcp))
return PTR_ERR(ipcp);
sma = container_of(ipcp, struct sem_array, sem_perm);
err = security_sem_semctl(sma, cmd);
- if (err)
+ if (err) {
+ rcu_read_unlock();
goto out_unlock;
+ }
switch(cmd){
case IPC_RMID:
+ sem_lock(sma, NULL, -1);
freeary(ns, ipcp);
goto out_up;
case IPC_SET:
+ sem_lock(sma, NULL, -1);
err = ipc_update_perm(&semid64.sem_perm, ipcp);
if (err)
goto out_unlock;
sma->sem_ctime = get_seconds();
break;
default:
+ rcu_read_unlock();
err = -EINVAL;
+ goto out_up;
}
out_unlock:
- sem_unlock(sma);
+ sem_unlock(sma, -1);
out_up:
up_write(&sem_ids(ns).rw_mutex);
return err;
@@ -1264,8 +1419,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
struct sem_array *sma;
struct sem_undo_list *ulp;
struct sem_undo *un, *new;
- int nsems;
- int error;
+ int nsems, error;
error = get_undo_list(&ulp);
if (error)
@@ -1277,16 +1431,22 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
spin_unlock(&ulp->lock);
if (likely(un!=NULL))
goto out;
- rcu_read_unlock();
/* no undo structure around - allocate one. */
/* step 1: figure out the size of the semaphore array */
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
return ERR_CAST(sma);
+ }
nsems = sma->sem_nsems;
- sem_getref_and_unlock(sma);
+ if (!ipc_rcu_getref(sma)) {
+ rcu_read_unlock();
+ un = ERR_PTR(-EIDRM);
+ goto out;
+ }
+ rcu_read_unlock();
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
@@ -1298,7 +1458,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
/* step 3: Acquire the lock on semaphore array */
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
kfree(new);
un = ERR_PTR(-EIDRM);
goto out;
@@ -1326,7 +1486,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
success:
spin_unlock(&ulp->lock);
rcu_read_lock();
- sem_unlock(sma);
+ sem_unlock(sma, -1);
out:
return un;
}
@@ -1366,7 +1526,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
struct sembuf fast_sops[SEMOPM_FAST];
struct sembuf* sops = fast_sops, *sop;
struct sem_undo *un;
- int undos = 0, alter = 0, max;
+ int undos = 0, alter = 0, max, locknum;
struct sem_queue queue;
unsigned long jiffies_left = 0;
struct ipc_namespace *ns;
@@ -1410,25 +1570,45 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
alter = 1;
}
+ INIT_LIST_HEAD(&tasks);
+
if (undos) {
+ /* On success, find_alloc_undo takes the rcu_read_lock */
un = find_alloc_undo(ns, semid);
if (IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
}
- } else
+ } else {
un = NULL;
+ rcu_read_lock();
+ }
- INIT_LIST_HEAD(&tasks);
-
- sma = sem_lock_check(ns, semid);
+ sma = sem_obtain_object_check(ns, semid);
if (IS_ERR(sma)) {
- if (un)
- rcu_read_unlock();
+ rcu_read_unlock();
error = PTR_ERR(sma);
goto out_free;
}
+ error = -EFBIG;
+ if (max >= sma->sem_nsems) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
+
+ error = -EACCES;
+ if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
+
+ error = security_sem_semop(sma, sops, nsops, alter);
+ if (error) {
+ rcu_read_unlock();
+ goto out_wakeup;
+ }
+
/*
* semid identifiers are not unique - find_alloc_undo may have
* allocated an undo structure, it was invalidated by an RMID
@@ -1437,33 +1617,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
* "un" itself is guaranteed by rcu.
*/
error = -EIDRM;
- if (un) {
- if (un->semid == -1) {
- rcu_read_unlock();
- goto out_unlock_free;
- } else {
- /*
- * rcu lock can be released, "un" cannot disappear:
- * - sem_lock is acquired, thus IPC_RMID is
- * impossible.
- * - exit_sem is impossible, it always operates on
- * current (or a dead task).
- */
-
- rcu_read_unlock();
- }
- }
-
- error = -EFBIG;
- if (max >= sma->sem_nsems)
- goto out_unlock_free;
-
- error = -EACCES;
- if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
- goto out_unlock_free;
-
- error = security_sem_semop(sma, sops, nsops, alter);
- if (error)
+ locknum = sem_lock(sma, sops, nsops);
+ if (un && un->semid == -1)
goto out_unlock_free;
error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
@@ -1483,21 +1638,20 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
queue.undo = un;
queue.pid = task_tgid_vnr(current);
queue.alter = alter;
- if (alter)
- list_add_tail(&queue.list, &sma->sem_pending);
- else
- list_add(&queue.list, &sma->sem_pending);
if (nsops == 1) {
struct sem *curr;
curr = &sma->sem_base[sops->sem_num];
if (alter)
- list_add_tail(&queue.simple_list, &curr->sem_pending);
+ list_add_tail(&queue.list, &curr->sem_pending);
else
- list_add(&queue.simple_list, &curr->sem_pending);
+ list_add(&queue.list, &curr->sem_pending);
} else {
- INIT_LIST_HEAD(&queue.simple_list);
+ if (alter)
+ list_add_tail(&queue.list, &sma->sem_pending);
+ else
+ list_add(&queue.list, &sma->sem_pending);
sma->complex_count++;
}
@@ -1506,7 +1660,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
sleep_again:
current->state = TASK_INTERRUPTIBLE;
- sem_unlock(sma);
+ sem_unlock(sma, locknum);
if (timeout)
jiffies_left = schedule_timeout(jiffies_left);
@@ -1528,7 +1682,7 @@ sleep_again:
goto out_free;
}
- sma = sem_lock(ns, semid);
+ sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
/*
* Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
@@ -1567,8 +1721,8 @@ sleep_again:
unlink_queue(sma, &queue);
out_unlock_free:
- sem_unlock(sma);
-
+ sem_unlock(sma, locknum);
+out_wakeup:
wake_up_sem_queue_do(&tasks);
out_free:
if(sops != fast_sops)
@@ -1631,8 +1785,7 @@ void exit_sem(struct task_struct *tsk)
struct sem_array *sma;
struct sem_undo *un;
struct list_head tasks;
- int semid;
- int i;
+ int semid, i;
rcu_read_lock();
un = list_entry_rcu(ulp->list_proc.next,
@@ -1641,23 +1794,26 @@ void exit_sem(struct task_struct *tsk)
semid = -1;
else
semid = un->semid;
- rcu_read_unlock();
- if (semid == -1)
+ if (semid == -1) {
+ rcu_read_unlock();
break;
+ }
- sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
-
+ sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
/* exit_sem raced with IPC_RMID, nothing to do */
- if (IS_ERR(sma))
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
continue;
+ }
+ sem_lock(sma, NULL, -1);
un = __lookup_undo(ulp, semid);
if (un == NULL) {
/* exit_sem raced with IPC_RMID+semget() that created
* exactly the same semid. Nothing to do.
*/
- sem_unlock(sma);
+ sem_unlock(sma, -1);
continue;
}
@@ -1697,7 +1853,7 @@ void exit_sem(struct task_struct *tsk)
/* maybe some queued-up processes were waiting for this */
INIT_LIST_HEAD(&tasks);
do_smart_update(sma, NULL, 0, 1, &tasks);
- sem_unlock(sma);
+ sem_unlock(sma, -1);
wake_up_sem_queue_do(&tasks);
kfree_rcu(un, rcu);
diff --git a/ipc/shm.c b/ipc/shm.c
index cb858df061d3..8247c49ec073 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -462,7 +462,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
size_t size = params->u.size;
int error;
struct shmid_kernel *shp;
- int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct file * file;
char name[13];
int id;
diff --git a/ipc/util.c b/ipc/util.c
index 03eadd8fb0fd..579201e4bc01 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -439,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
* NULL is returned if the allocation fails
*/
-void* ipc_alloc(int size)
+void *ipc_alloc(int size)
{
- void* out;
+ void *out;
if(size > PAGE_SIZE)
out = vmalloc(size);
else
@@ -478,7 +478,7 @@ void ipc_free(void* ptr, int size)
*/
struct ipc_rcu_hdr
{
- int refcount;
+ atomic_t refcount;
int is_vmalloc;
void *data[0];
};
@@ -516,39 +516,41 @@ static inline int rcu_use_vmalloc(int size)
* @size: size desired
*
* Allocate memory for the rcu header structure + the object.
- * Returns the pointer to the object.
- * NULL is returned if the allocation fails.
+ * Returns the pointer to the object or NULL upon failure.
*/
-
-void* ipc_rcu_alloc(int size)
+void *ipc_rcu_alloc(int size)
{
- void* out;
- /*
+ void *out;
+
+ /*
* We prepend the allocation with the rcu struct, and
- * workqueue if necessary (for vmalloc).
+ * workqueue if necessary (for vmalloc).
*/
if (rcu_use_vmalloc(size)) {
out = vmalloc(HDRLEN_VMALLOC + size);
- if (out) {
- out += HDRLEN_VMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
- container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
- }
+ if (!out)
+ goto done;
+
+ out += HDRLEN_VMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
} else {
out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
- if (out) {
- out += HDRLEN_KMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
- container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
- }
+ if (!out)
+ goto done;
+
+ out += HDRLEN_KMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
}
+ /* set reference counter no matter what kind of allocation was done */
+ atomic_set(&container_of(out, struct ipc_rcu_hdr, data)->refcount, 1);
+done:
return out;
}
-void ipc_rcu_getref(void *ptr)
+int ipc_rcu_getref(void *ptr)
{
- container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
+ return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount);
}
static void ipc_do_vfree(struct work_struct *work)
@@ -578,7 +580,7 @@ static void ipc_schedule_free(struct rcu_head *head)
void ipc_rcu_putref(void *ptr)
{
- if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
+ if (!atomic_dec_and_test(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount))
return;
if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
@@ -669,38 +671,81 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
}
/**
+ * ipc_obtain_object
+ * @ids: ipc identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and return associated ipc object.
+ *
+ * Call inside the RCU critical section.
+ * The ipc object is *not* locked on exit.
+ */
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
+{
+ struct kern_ipc_perm *out;
+ int lid = ipcid_to_idx(id);
+
+ out = idr_find(&ids->ipcs_idr, lid);
+ if (!out)
+ return ERR_PTR(-EINVAL);
+
+ return out;
+}
+
+/**
* ipc_lock - Lock an ipc structure without rw_mutex held
* @ids: IPC identifier set
* @id: ipc id to look for
*
* Look for an id in the ipc ids idr and lock the associated ipc object.
*
- * The ipc object is locked on exit.
+ * The ipc object is locked on successful exit.
*/
-
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
rcu_read_lock();
- out = idr_find(&ids->ipcs_idr, lid);
- if (out == NULL) {
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
+ out = ipc_obtain_object(ids, id);
+ if (IS_ERR(out))
+ goto err1;
spin_lock(&out->lock);
-
+
/* ipc_rmid() may have already freed the ID while ipc_lock
* was spinning: here verify that the structure is still valid
*/
- if (out->deleted) {
- spin_unlock(&out->lock);
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
+ if (!out->deleted)
+ return out;
+ spin_unlock(&out->lock);
+ out = ERR_PTR(-EINVAL);
+err1:
+ rcu_read_unlock();
+ return out;
+}
+
+/**
+ * ipc_obtain_object_check
+ * @ids: ipc identifier set
+ * @id: ipc id to look for
+ *
+ * Similar to ipc_obtain_object() but also checks
+ * the ipc object reference counter.
+ *
+ * Call inside the RCU critical section.
+ * The ipc object is *not* locked on exit.
+ */
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)
+{
+ struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
+
+ if (IS_ERR(out))
+ goto out;
+
+ if (ipc_checkid(out, id))
+ return ERR_PTR(-EIDRM);
+out:
return out;
}
@@ -781,11 +826,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc64_perm *perm, int extra_perm)
{
struct kern_ipc_perm *ipcp;
+
+ ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
+ if (IS_ERR(ipcp))
+ goto out;
+
+ spin_lock(&ipcp->lock);
+out:
+ return ipcp;
+}
+
+struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+ struct ipc_ids *ids, int id, int cmd,
+ struct ipc64_perm *perm, int extra_perm)
+{
kuid_t euid;
- int err;
+ int err = -EPERM;
+ struct kern_ipc_perm *ipcp;
down_write(&ids->rw_mutex);
- ipcp = ipc_lock_check(ids, id);
+ rcu_read_lock();
+
+ ipcp = ipc_obtain_object_check(ids, id);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_up;
@@ -794,17 +856,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
audit_ipc_obj(ipcp);
if (cmd == IPC_SET)
audit_ipc_set_perm(extra_perm, perm->uid,
- perm->gid, perm->mode);
+ perm->gid, perm->mode);
euid = current_euid();
if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) ||
ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ipcp;
- err = -EPERM;
- ipc_unlock(ipcp);
out_up:
+ /*
+ * Unsuccessful lookup, unlock and return
+ * the corresponding error.
+ */
+ rcu_read_unlock();
up_write(&ids->rw_mutex);
+
return ERR_PTR(err);
}
diff --git a/ipc/util.h b/ipc/util.h
index eeb79a1fbd83..2b0bdd5d92ce 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -119,14 +119,18 @@ void ipc_free(void* ptr, int size);
* to 0 schedules the rcu destruction. Caller must guarantee locking.
*/
void* ipc_rcu_alloc(int size);
-void ipc_rcu_getref(void *ptr);
+int ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
+struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+ struct ipc_ids *ids, int id, int cmd,
+ struct ipc64_perm *perm, int extra_perm);
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
@@ -150,14 +154,9 @@ static inline int ipc_buildid(int id, int seq)
return SEQ_MULTIPLIER * seq + id;
}
-/*
- * Must be called with ipcp locked
- */
static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
{
- if (uid / SEQ_MULTIPLIER != ipcp->seq)
- return 1;
- return 0;
+ return uid / SEQ_MULTIPLIER != ipcp->seq;
}
static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
@@ -172,7 +171,13 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
rcu_read_unlock();
}
+static inline void ipc_lock_object(struct kern_ipc_perm *perm)
+{
+ spin_lock(&perm->lock);
+}
+
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,