summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 12:07:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 12:07:51 -0700
commitb34d8915c413acb51d837a45fb8747b61f65c020 (patch)
treeced5fac166324634653d84b1afe2b958b3904f4d
parente8a89cebdbaab14caaa26debdb4ffd493b8831af (diff)
parentf33ebbe9da2c3c24664a0ad4f8fd83f293547e63 (diff)
downloadlwn-b34d8915c413acb51d837a45fb8747b61f65c020.tar.gz
lwn-b34d8915c413acb51d837a45fb8747b61f65c020.zip
Merge branch 'writable_limits' of git://decibel.fi.muni.cz/~xslaby/linux
* 'writable_limits' of git://decibel.fi.muni.cz/~xslaby/linux: unistd: add __NR_prlimit64 syscall numbers rlimits: implement prlimit64 syscall rlimits: switch more rlimit syscalls to do_prlimit rlimits: redo do_setrlimit to more generic do_prlimit rlimits: add rlimit64 structure rlimits: do security check under task_lock rlimits: allow setrlimit to non-current tasks rlimits: split sys_setrlimit rlimits: selinux, do rlimits changes under task_lock rlimits: make sure ->rlim_max never grows in sys_setrlimit rlimits: add task_struct to update_rlimit_cpu rlimits: security, add task_struct to setrlimit Fix up various system call number conflicts. We not only added fanotify system calls in the meantime, but asm-generic/unistd.h added a wait4 along with a range of reserved per-architecture system calls.
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--include/asm-generic/unistd.h4
-rw-r--r--include/linux/posix-timers.h2
-rw-r--r--include/linux/resource.h9
-rw-r--r--include/linux/security.h9
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--kernel/compat.c17
-rw-r--r--kernel/posix-cpu-timers.c8
-rw-r--r--kernel/sys.c202
-rw-r--r--security/capability.c3
-rw-r--r--security/security.c5
-rw-r--r--security/selinux/hooks.c12
15 files changed, 207 insertions, 75 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 17cf65c94804..91dc4bb13032 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -844,4 +844,5 @@ ia32_sys_call_table:
.quad compat_sys_recvmmsg
.quad sys_fanotify_init
.quad sys32_fanotify_mark
+ .quad sys_prlimit64
ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 80b799cd74f7..b766a5e8ba0e 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -345,10 +345,11 @@
#define __NR_recvmmsg 337
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
+#define __NR_prlimit64 340
#ifdef __KERNEL__
-#define NR_syscalls 340
+#define NR_syscalls 341
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 5b7b1d585616..363e9b8a715b 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -667,6 +667,8 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 301
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
+#define __NR_prlimit64 302
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 07ad5eb7cc5c..4802accb9d8d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -339,3 +339,4 @@ ENTRY(sys_call_table)
.long sys_recvmmsg
.long sys_fanotify_init
.long sys_fanotify_mark
+ .long sys_prlimit64
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index c17cebc49952..e1898090f22c 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -640,9 +640,11 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#define __NR_wait4 260
__SYSCALL(__NR_wait4, sys_wait4)
+#define __NR_prlimit64 261
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
#undef __NR_syscalls
-#define __NR_syscalls 261
+#define __NR_syscalls 262
/*
* All syscalls below here should go away really,
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 4f71bf4e628c..3e23844a6990 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -117,6 +117,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
long clock_nanosleep_restart(struct restart_block *restart_block);
-void update_rlimit_cpu(unsigned long rlim_new);
+void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
#endif
diff --git a/include/linux/resource.h b/include/linux/resource.h
index f1e914eefeab..88d36f9145ba 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -43,6 +43,13 @@ struct rlimit {
unsigned long rlim_max;
};
+#define RLIM64_INFINITY (~0ULL)
+
+struct rlimit64 {
+ __u64 rlim_cur;
+ __u64 rlim_max;
+};
+
#define PRIO_MIN (-20)
#define PRIO_MAX 20
@@ -73,6 +80,8 @@ struct rlimit {
struct task_struct;
int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
+int do_prlimit(struct task_struct *tsk, unsigned int resource,
+ struct rlimit *new_rlim, struct rlimit *old_rlim);
#endif /* __KERNEL__ */
diff --git a/include/linux/security.h b/include/linux/security.h
index 5bcb395a49d4..a22219afff09 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1499,7 +1499,8 @@ struct security_operations {
int (*task_setnice) (struct task_struct *p, int nice);
int (*task_setioprio) (struct task_struct *p, int ioprio);
int (*task_getioprio) (struct task_struct *p);
- int (*task_setrlimit) (unsigned int resource, struct rlimit *new_rlim);
+ int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
+ struct rlimit *new_rlim);
int (*task_setscheduler) (struct task_struct *p, int policy,
struct sched_param *lp);
int (*task_getscheduler) (struct task_struct *p);
@@ -1749,7 +1750,8 @@ void security_task_getsecid(struct task_struct *p, u32 *secid);
int security_task_setnice(struct task_struct *p, int nice);
int security_task_setioprio(struct task_struct *p, int ioprio);
int security_task_getioprio(struct task_struct *p);
-int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim);
+int security_task_setrlimit(struct task_struct *p, unsigned int resource,
+ struct rlimit *new_rlim);
int security_task_setscheduler(struct task_struct *p,
int policy, struct sched_param *lp);
int security_task_getscheduler(struct task_struct *p);
@@ -2311,7 +2313,8 @@ static inline int security_task_getioprio(struct task_struct *p)
return 0;
}
-static inline int security_task_setrlimit(unsigned int resource,
+static inline int security_task_setrlimit(struct task_struct *p,
+ unsigned int resource,
struct rlimit *new_rlim)
{
return 0;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ab198a1e38d..1b67bd333b5e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -35,6 +35,7 @@ struct oldold_utsname;
struct old_utsname;
struct pollfd;
struct rlimit;
+struct rlimit64;
struct rusage;
struct sched_param;
struct sel_arg_struct;
@@ -644,6 +645,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
#endif
asmlinkage long sys_setrlimit(unsigned int resource,
struct rlimit __user *rlim);
+asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
+ const struct rlimit64 __user *new_rlim,
+ struct rlimit64 __user *old_rlim);
asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
asmlinkage long sys_umask(int mask);
diff --git a/kernel/compat.c b/kernel/compat.c
index 5adab05a3172..e167efce8423 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -279,11 +279,6 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
struct compat_rlimit __user *rlim)
{
struct rlimit r;
- int ret;
- mm_segment_t old_fs = get_fs ();
-
- if (resource >= RLIM_NLIMITS)
- return -EINVAL;
if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
__get_user(r.rlim_cur, &rlim->rlim_cur) ||
@@ -294,10 +289,7 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
r.rlim_cur = RLIM_INFINITY;
if (r.rlim_max == COMPAT_RLIM_INFINITY)
r.rlim_max = RLIM_INFINITY;
- set_fs(KERNEL_DS);
- ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
- set_fs(old_fs);
- return ret;
+ return do_prlimit(current, resource, &r, NULL);
}
#ifdef COMPAT_RLIM_OLD_INFINITY
@@ -329,16 +321,13 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource,
#endif
-asmlinkage long compat_sys_getrlimit (unsigned int resource,
+asmlinkage long compat_sys_getrlimit(unsigned int resource,
struct compat_rlimit __user *rlim)
{
struct rlimit r;
int ret;
- mm_segment_t old_fs = get_fs();
- set_fs(KERNEL_DS);
- ret = sys_getrlimit(resource, (struct rlimit __user *) &r);
- set_fs(old_fs);
+ ret = do_prlimit(current, resource, NULL, &r);
if (!ret) {
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
r.rlim_cur = COMPAT_RLIM_INFINITY;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index f66bdd33a6c6..6842eeba5879 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -16,13 +16,13 @@
* siglock protection since other code may update expiration cache as
* well.
*/
-void update_rlimit_cpu(unsigned long rlim_new)
+void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
- spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
- spin_unlock_irq(&current->sighand->siglock);
+ spin_lock_irq(&task->sighand->siglock);
+ set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
+ spin_unlock_irq(&task->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)
diff --git a/kernel/sys.c b/kernel/sys.c
index e83ddbbaf89d..e9ad44489828 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1236,15 +1236,14 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
- if (resource >= RLIM_NLIMITS)
- return -EINVAL;
- else {
- struct rlimit value;
- task_lock(current->group_leader);
- value = current->signal->rlim[resource];
- task_unlock(current->group_leader);
- return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
- }
+ struct rlimit value;
+ int ret;
+
+ ret = do_prlimit(current, resource, NULL, &value);
+ if (!ret)
+ ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+
+ return ret;
}
#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
@@ -1272,44 +1271,89 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
#endif
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+static inline bool rlim64_is_infinity(__u64 rlim64)
{
- struct rlimit new_rlim, *old_rlim;
- int retval;
+#if BITS_PER_LONG < 64
+ return rlim64 >= ULONG_MAX;
+#else
+ return rlim64 == RLIM64_INFINITY;
+#endif
+}
+
+static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
+{
+ if (rlim->rlim_cur == RLIM_INFINITY)
+ rlim64->rlim_cur = RLIM64_INFINITY;
+ else
+ rlim64->rlim_cur = rlim->rlim_cur;
+ if (rlim->rlim_max == RLIM_INFINITY)
+ rlim64->rlim_max = RLIM64_INFINITY;
+ else
+ rlim64->rlim_max = rlim->rlim_max;
+}
+
+static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
+{
+ if (rlim64_is_infinity(rlim64->rlim_cur))
+ rlim->rlim_cur = RLIM_INFINITY;
+ else
+ rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
+ if (rlim64_is_infinity(rlim64->rlim_max))
+ rlim->rlim_max = RLIM_INFINITY;
+ else
+ rlim->rlim_max = (unsigned long)rlim64->rlim_max;
+}
+
+/* make sure you are allowed to change @tsk limits before calling this */
+int do_prlimit(struct task_struct *tsk, unsigned int resource,
+ struct rlimit *new_rlim, struct rlimit *old_rlim)
+{
+ struct rlimit *rlim;
+ int retval = 0;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
- return -EFAULT;
- if (new_rlim.rlim_cur > new_rlim.rlim_max)
- return -EINVAL;
- old_rlim = current->signal->rlim + resource;
- if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
- !capable(CAP_SYS_RESOURCE))
- return -EPERM;
- if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
- return -EPERM;
-
- retval = security_task_setrlimit(resource, &new_rlim);
- if (retval)
- return retval;
-
- if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
- /*
- * The caller is asking for an immediate RLIMIT_CPU
- * expiry. But we use the zero value to mean "it was
- * never set". So let's cheat and make it one second
- * instead
- */
- new_rlim.rlim_cur = 1;
+ if (new_rlim) {
+ if (new_rlim->rlim_cur > new_rlim->rlim_max)
+ return -EINVAL;
+ if (resource == RLIMIT_NOFILE &&
+ new_rlim->rlim_max > sysctl_nr_open)
+ return -EPERM;
}
- task_lock(current->group_leader);
- *old_rlim = new_rlim;
- task_unlock(current->group_leader);
-
- if (resource != RLIMIT_CPU)
+ /* protect tsk->signal and tsk->sighand from disappearing */
+ read_lock(&tasklist_lock);
+ if (!tsk->sighand) {
+ retval = -ESRCH;
goto out;
+ }
+
+ rlim = tsk->signal->rlim + resource;
+ task_lock(tsk->group_leader);
+ if (new_rlim) {
+ if (new_rlim->rlim_max > rlim->rlim_max &&
+ !capable(CAP_SYS_RESOURCE))
+ retval = -EPERM;
+ if (!retval)
+ retval = security_task_setrlimit(tsk->group_leader,
+ resource, new_rlim);
+ if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
+ /*
+ * The caller is asking for an immediate RLIMIT_CPU
+ * expiry. But we use the zero value to mean "it was
+ * never set". So let's cheat and make it one second
+ * instead
+ */
+ new_rlim->rlim_cur = 1;
+ }
+ }
+ if (!retval) {
+ if (old_rlim)
+ *old_rlim = *rlim;
+ if (new_rlim)
+ *rlim = *new_rlim;
+ }
+ task_unlock(tsk->group_leader);
/*
* RLIMIT_CPU handling. Note that the kernel fails to return an error
@@ -1317,14 +1361,84 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
* very long-standing error, and fixing it now risks breakage of
* applications, so we live with it
*/
- if (new_rlim.rlim_cur == RLIM_INFINITY)
- goto out;
-
- update_rlimit_cpu(new_rlim.rlim_cur);
+ if (!retval && new_rlim && resource == RLIMIT_CPU &&
+ new_rlim->rlim_cur != RLIM_INFINITY)
+ update_rlimit_cpu(tsk, new_rlim->rlim_cur);
out:
+ read_unlock(&tasklist_lock);
+ return retval;
+}
+
+/* rcu lock must be held */
+static int check_prlimit_permission(struct task_struct *task)
+{
+ const struct cred *cred = current_cred(), *tcred;
+
+ tcred = __task_cred(task);
+ if ((cred->uid != tcred->euid ||
+ cred->uid != tcred->suid ||
+ cred->uid != tcred->uid ||
+ cred->gid != tcred->egid ||
+ cred->gid != tcred->sgid ||
+ cred->gid != tcred->gid) &&
+ !capable(CAP_SYS_RESOURCE)) {
+ return -EPERM;
+ }
+
return 0;
}
+SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
+ const struct rlimit64 __user *, new_rlim,
+ struct rlimit64 __user *, old_rlim)
+{
+ struct rlimit64 old64, new64;
+ struct rlimit old, new;
+ struct task_struct *tsk;
+ int ret;
+
+ if (new_rlim) {
+ if (copy_from_user(&new64, new_rlim, sizeof(new64)))
+ return -EFAULT;
+ rlim64_to_rlim(&new64, &new);
+ }
+
+ rcu_read_lock();
+ tsk = pid ? find_task_by_vpid(pid) : current;
+ if (!tsk) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+ ret = check_prlimit_permission(tsk);
+ if (ret) {
+ rcu_read_unlock();
+ return ret;
+ }
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
+ old_rlim ? &old : NULL);
+
+ if (!ret && old_rlim) {
+ rlim_to_rlim64(&old, &old64);
+ if (copy_to_user(old_rlim, &old64, sizeof(old64)))
+ ret = -EFAULT;
+ }
+
+ put_task_struct(tsk);
+ return ret;
+}
+
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+ struct rlimit new_rlim;
+
+ if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+ return -EFAULT;
+ return do_prlimit(current, resource, &new_rlim, NULL);
+}
+
/*
* It would make sense to put struct rusage in the task_struct,
* except that would make the task_struct be *really big*. After
diff --git a/security/capability.c b/security/capability.c
index a0bbf30fb6dc..95a6599a37bb 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -411,7 +411,8 @@ static int cap_task_getioprio(struct task_struct *p)
return 0;
}
-static int cap_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
+static int cap_task_setrlimit(struct task_struct *p, unsigned int resource,
+ struct rlimit *new_rlim)
{
return 0;
}
diff --git a/security/security.c b/security/security.c
index 7461b1bc296c..c53949f17d9e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -780,9 +780,10 @@ int security_task_getioprio(struct task_struct *p)
return security_ops->task_getioprio(p);
}
-int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
+int security_task_setrlimit(struct task_struct *p, unsigned int resource,
+ struct rlimit *new_rlim)
{
- return security_ops->task_setrlimit(resource, new_rlim);
+ return security_ops->task_setrlimit(p, resource, new_rlim);
}
int security_task_setscheduler(struct task_struct *p,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9b40f4c0ac70..42043f96e54f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2284,12 +2284,15 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm)
rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS,
PROCESS__RLIMITINH, NULL);
if (rc) {
+ /* protect against do_prlimit() */
+ task_lock(current);
for (i = 0; i < RLIM_NLIMITS; i++) {
rlim = current->signal->rlim + i;
initrlim = init_task.signal->rlim + i;
rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
}
- update_rlimit_cpu(current->signal->rlim[RLIMIT_CPU].rlim_cur);
+ task_unlock(current);
+ update_rlimit_cpu(current, rlimit(RLIMIT_CPU));
}
}
@@ -3333,16 +3336,17 @@ static int selinux_task_getioprio(struct task_struct *p)
return current_has_perm(p, PROCESS__GETSCHED);
}
-static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
+static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
+ struct rlimit *new_rlim)
{
- struct rlimit *old_rlim = current->signal->rlim + resource;
+ struct rlimit *old_rlim = p->signal->rlim + resource;
/* Control the ability to change the hard limit (whether
lowering or raising it), so that the hard limit can
later be used as a safe reset point for the soft limit
upon context transitions. See selinux_bprm_committing_creds. */
if (old_rlim->rlim_max != new_rlim->rlim_max)
- return current_has_perm(current, PROCESS__SETRLIMIT);
+ return current_has_perm(p, PROCESS__SETRLIMIT);
return 0;
}