From eab172294d5e24464f332dd8e94a57a9819c81c4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 17:03:22 +0800 Subject: sched: cleanup for alloc_rt/fair_sched_group() Impact: cleanup Remove checking parent == NULL. It won't be NULLL, because we dynamically create sub task_group only, and sub task_group always has its parent. (root task_group is statically defined) Also replace kmalloc_node(GFP_ZERO) with kzalloc_node(). Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- kernel/sched.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f462..7dd6c860773b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8472,7 +8472,7 @@ static int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct cfs_rq *cfs_rq; - struct sched_entity *se, *parent_se; + struct sched_entity *se; struct rq *rq; int i; @@ -8488,18 +8488,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) for_each_possible_cpu(i) { rq = cpu_rq(i); - cfs_rq = kmalloc_node(sizeof(struct cfs_rq), - GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); + cfs_rq = kzalloc_node(sizeof(struct cfs_rq), + GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) goto err; - se = kmalloc_node(sizeof(struct sched_entity), - GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); + se = kzalloc_node(sizeof(struct sched_entity), + GFP_KERNEL, cpu_to_node(i)); if (!se) goto err; - parent_se = parent ? parent->se[i] : NULL; - init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se); + init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); } return 1; @@ -8560,7 +8559,7 @@ static int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) { struct rt_rq *rt_rq; - struct sched_rt_entity *rt_se, *parent_se; + struct sched_rt_entity *rt_se; struct rq *rq; int i; @@ -8577,18 +8576,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) for_each_possible_cpu(i) { rq = cpu_rq(i); - rt_rq = kmalloc_node(sizeof(struct rt_rq), - GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); + rt_rq = kzalloc_node(sizeof(struct rt_rq), + GFP_KERNEL, cpu_to_node(i)); if (!rt_rq) goto err; - rt_se = kmalloc_node(sizeof(struct sched_rt_entity), - GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); + rt_se = kzalloc_node(sizeof(struct sched_rt_entity), + GFP_KERNEL, cpu_to_node(i)); if (!rt_se) goto err; - parent_se = parent ? parent->rt_se[i] : NULL; - init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se); + init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); } return 1; -- cgit v1.2.3 From 34f3a814eef8069a24e5b3ebcf27aba9dabac2ea Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 30 Oct 2008 15:23:32 +0800 Subject: sched: switch sched_features to seqfile Impact: cleanup So handling of sched_features read is simplified. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 52 ++++++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 7dd6c860773b..5419df9cc5c4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -703,45 +703,18 @@ static __read_mostly char *sched_feat_names[] = { #undef SCHED_FEAT -static int sched_feat_open(struct inode *inode, struct file *filp) +static int sched_feat_show(struct seq_file *m, void *v) { - filp->private_data = inode->i_private; - return 0; -} - -static ssize_t -sched_feat_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char *buf; - int r = 0; - int len = 0; int i; for (i = 0; sched_feat_names[i]; i++) { - len += strlen(sched_feat_names[i]); - len += 4; + if (!(sysctl_sched_features & (1UL << i))) + seq_puts(m, "NO_"); + seq_printf(m, "%s ", sched_feat_names[i]); } + seq_puts(m, "\n"); - buf = kmalloc(len + 2, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - for (i = 0; sched_feat_names[i]; i++) { - if (sysctl_sched_features & (1UL << i)) - r += sprintf(buf + r, "%s ", sched_feat_names[i]); - else - r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]); - } - - r += sprintf(buf + r, "\n"); - WARN_ON(r >= len + 2); - - r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); - - kfree(buf); - - return r; + return 0; } static ssize_t @@ -786,10 +759,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf, return cnt; } +static int sched_feat_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_feat_show, NULL); +} + static struct file_operations sched_feat_fops = { - .open = sched_feat_open, - .read = sched_feat_read, - .write = sched_feat_write, + .open = sched_feat_open, + .write = sched_feat_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; static __init int sched_init_debug(void) -- cgit v1.2.3 From 8bb8c4386d08f2cc5d871d22f220d35032213f84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Nov 2008 00:13:49 +0100 Subject: sched, ftrace: trace sched.c Impact: allow function tracing within sched.c Its useful to see what happens in sched.c. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d84..e1af03972148 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg -CFLAGS_REMOVE_sched.o = -mno-spe -pg endif obj-$(CONFIG_FREEZER) += freezer.o -- cgit v1.2.3 From e113a745f693af196c8081b328bf42def086989b Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 31 Oct 2008 08:03:41 -0500 Subject: sched/rt: small optimization to update_curr_rt() Impact: micro-optimization to SCHED_FIFO/RR scheduling A very minor improvement, but might it be better to check sched_rt_runtime(rt_rq) before taking the rt_runtime_lock? Peter Zijlstra observes: > Yes, I think its ok to do so. > > Like pointed out in the other thread, there are two races: > > - sched_rt_runtime() going to RUNTIME_INF, and that will be handled > properly by sched_rt_runtime_exceeded() > > - sched_rt_runtime() going to !RUNTIME_INF, and here we can miss an > accounting cycle, but I don't think that is something to worry too > much about. Signed-off-by: Dimitri Sivanich Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar -- kernel/sched_rt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- kernel/sched_rt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d9ba9d5f99d6..c7963d5d0625 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq) for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); - spin_lock(&rt_rq->rt_runtime_lock); if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { + spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; if (sched_rt_runtime_exceeded(rt_rq)) resched_task(curr); + spin_unlock(&rt_rq->rt_runtime_lock); } - spin_unlock(&rt_rq->rt_runtime_lock); } } -- cgit v1.2.3 From eefd796a8e831408ce17e633d73d70430748c47a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 4 Nov 2008 16:15:37 +0800 Subject: sched debug: remove sd_level_to_string() Impact: cleanup Just use the newly introduced sd->name. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 5419df9cc5c4..7ac59bae87d2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6602,28 +6602,6 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG -static inline const char *sd_level_to_string(enum sched_domain_level lvl) -{ - switch (lvl) { - case SD_LV_NONE: - return "NONE"; - case SD_LV_SIBLING: - return "SIBLING"; - case SD_LV_MC: - return "MC"; - case SD_LV_CPU: - return "CPU"; - case SD_LV_NODE: - return "NODE"; - case SD_LV_ALLNODES: - return "ALLNODES"; - case SD_LV_MAX: - return "MAX"; - - } - return "MAX"; -} - static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpumask_t *groupmask) { @@ -6643,8 +6621,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, return -1; } - printk(KERN_CONT "span %s level %s\n", - str, sd_level_to_string(sd->level)); + printk(KERN_CONT "span %s level %s\n", str, sd->name); if (!cpu_isset(cpu, sd->span)) { printk(KERN_ERR "ERROR: domain->span does not contain " -- cgit v1.2.3 From 0a0db8f5c9d4bbb9bbfcc2b6cb6bce2d0ef4d73d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 4 Nov 2008 16:17:05 +0800 Subject: sched debug: remove NULL checking in print_cfs/rt_rq() Impact: cleanup cfs->tg is initialized in init_tg_cfs_entry() with tg != NULL, and will never be invalidated to NULL. And the underlying cgroup of a valid task_group is always valid. Same for rt->tg. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ae17762ec32..d25cefe3f0eb 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -121,14 +121,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) char path[128] = ""; - struct cgroup *cgroup = NULL; struct task_group *tg = cfs_rq->tg; - if (tg) - cgroup = tg->css.cgroup; - - if (cgroup) - cgroup_path(cgroup, path, sizeof(path)); + cgroup_path(tg->css.cgroup, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); #else @@ -193,14 +188,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) char path[128] = ""; - struct cgroup *cgroup = NULL; struct task_group *tg = rt_rq->tg; - if (tg) - cgroup = tg->css.cgroup; - - if (cgroup) - cgroup_path(cgroup, path, sizeof(path)); + cgroup_path(tg->css.cgroup, path, sizeof(path)); SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); #else -- cgit v1.2.3 From a17e2260926f681a0eb983c1e3cb859ba2064bce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 4 Nov 2008 16:19:13 +0800 Subject: sched: remove redundant call to unregister_sched_domain_sysctl() Impact: cleanup The sysctl has been unregistered by partition_sched_domains(). Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 7ac59bae87d2..3cb94fad33ca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7691,8 +7691,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) cpumask_t tmpmask; int i; - unregister_sched_domain_sysctl(); - for_each_cpu_mask_nr(i, *cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); -- cgit v1.2.3 From faa2f98f856e89d1afb6e4a91707284d242e816e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 4 Nov 2008 16:20:23 +0800 Subject: sched: add sanity check in partition_sched_domains() Impact: cleanup, add debug check It's wrong to make dattr_new = NULL if doms_new == NULL, it introduces memory leak if dattr_new != NULL. Fortunately dattr_new is always NULL in this case. So remove the code and add a sanity check. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3cb94fad33ca..213cad5e50aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7767,7 +7767,7 @@ match1: ndoms_cur = 0; doms_new = &fallback_doms; cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); - dattr_new = NULL; + WARN_ON_ONCE(dattr_new); } /* Build new domains */ -- cgit v1.2.3 From 1f29fae29709b4668979e244c09b2fa78ff1ad59 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 5 Nov 2008 16:08:52 -0600 Subject: file capabilities: add no_file_caps switch (v4) Add a no_file_caps boot option when file capabilities are compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y). This allows distributions to ship a kernel with file capabilities compiled in, without forcing users to use (and understand and trust) them. When no_file_caps is specified at boot, then when a process executes a file, any file capabilities stored with that file will not be used in the calculation of the process' new capability sets. This means that booting with the no_file_caps boot option will not be the same as booting a kernel with file capabilities compiled out - in particular a task with CAP_SETPCAP will not have any chance of passing capabilities to another task (which isn't "really" possible anyway, and which may soon by killed altogether by David Howells in any case), and it will instead be able to put new capabilities in its pI. However since fI will always be empty and pI is masked with fI, it gains the task nothing. We also support the extra prctl options, setting securebits and dropping capabilities from the per-process bounding set. The other remaining difference is that killpriv, task_setscheduler, setioprio, and setnice will continue to be hooked. That will be noticable in the case where a root task changed its uid while keeping some caps, and another task owned by the new uid tries to change settings for the more privileged task. Changelog: Nov 05 2008: (v4) trivial port on top of always-start-\ with-clear-caps patch Sep 23 2008: nixed file_caps_enabled when file caps are not compiled in as it isn't used. Document no_file_caps in kernel-parameters.txt. Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- Documentation/kernel-parameters.txt | 4 ++++ include/linux/capability.h | 3 +++ kernel/capability.c | 11 +++++++++++ security/commoncap.c | 3 +++ 4 files changed, 21 insertions(+) (limited to 'kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..784443acca9c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1459,6 +1459,10 @@ and is between 256 and 4096 characters. It is defined in the file instruction doesn't work correctly and not to use it. + no_file_caps Tells the kernel not to honor file capabilities. The + only way then for a file to be executed with privilege + is to be setuid root or executed by root. + nohalt [IA-64] Tells the kernel not to use the power saving function PAL_HALT_LIGHT when idle. This increases power-consumption. On the positive side, it reduces diff --git a/include/linux/capability.h b/include/linux/capability.h index 9d1fe30b6f6c..5bc145bd759a 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32 VFS_CAP_U32_2 #define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +extern int file_caps_enabled; +#endif struct vfs_cap_data { __le32 magic_etc; /* Little endian */ diff --git a/kernel/capability.c b/kernel/capability.c index 33e51e78c2d8..e13a68535ad5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -33,6 +33,17 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +int file_caps_enabled = 1; + +static int __init file_caps_disable(char *str) +{ + file_caps_enabled = 0; + return 1; +} +__setup("no_file_caps", file_caps_disable); +#endif + /* * More recent versions of libcap are available from: * diff --git a/security/commoncap.c b/security/commoncap.c index 3976613db829..f88119cb2bc2 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -281,6 +281,9 @@ static int get_file_caps(struct linux_binprm *bprm) bprm_clear_caps(bprm); + if (!file_caps_enabled) + return 0; + if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) return 0; -- cgit v1.2.3 From cf7f8690e864c6fe11e77202dd847fa60f483418 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Wed, 5 Nov 2008 18:57:14 +0530 Subject: sched, lockdep: inline double_unlock_balance() We have a test case which measures the variation in the amount of time needed to perform a fixed amount of work on the preempt_rt kernel. We started seeing deterioration in it's performance recently. The test should never take more than 10 microseconds, but we started 5-10% failure rate. Using elimination method, we traced the problem to commit 1b12bbc747560ea68bcc132c3d05699e52271da0 (lockdep: re-annotate scheduler runqueues). When LOCKDEP is disabled, this patch only adds an additional function call to double_unlock_balance(). Hence I inlined double_unlock_balance() and the problem went away. Here is a patch to make this change. Signed-off-by: Sripathi Kodi Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- kernel/sched_rt.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f462..ad10d0aae1d7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2825,7 +2825,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) return ret; } -static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) +static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { spin_unlock(&busiest->lock); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c7963d5d0625..2bdd44423599 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -910,7 +910,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) #define RT_MAX_TRIES 3 static int double_lock_balance(struct rq *this_rq, struct rq *busiest); -static void double_unlock_balance(struct rq *this_rq, struct rq *busiest); +static inline void double_unlock_balance(struct rq *this_rq, + struct rq *busiest); static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); -- cgit v1.2.3 From 6d21cd62516a9697cb7ec33cc52e6b814fb65a13 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Nov 2008 17:03:18 +0800 Subject: sched: clean up SCHED_CPUMASK_ALLOC Impact: cleanup The #if/#endif is ugly. Change SCHED_CPUMASK_ALLOC and SCHED_CPUMASK_FREE to static inline functions. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index b24e57a10f6f..59db86c915f9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7279,13 +7279,21 @@ struct allmasks { }; #if NR_CPUS > 128 -#define SCHED_CPUMASK_ALLOC 1 -#define SCHED_CPUMASK_FREE(v) kfree(v) -#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ + *masks = kmalloc(sizeof(**masks), GFP_KERNEL); +} +static inline void sched_cpumask_free(struct allmasks *masks) +{ + kfree(masks); +} #else -#define SCHED_CPUMASK_ALLOC 0 -#define SCHED_CPUMASK_FREE(v) -#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v +static inline void sched_cpumask_alloc(struct allmasks **masks) +{ } +static inline void sched_cpumask_free(struct allmasks *masks) +{ } #endif #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ @@ -7361,9 +7369,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map, return -ENOMEM; } -#if SCHED_CPUMASK_ALLOC /* get space for all scratch cpumask variables */ - allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); + sched_cpumask_alloc(&allmasks); if (!allmasks) { printk(KERN_WARNING "Cannot alloc cpumask array\n"); kfree(rd); @@ -7372,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #endif return -ENOMEM; } -#endif + tmpmask = (cpumask_t *)allmasks; @@ -7626,13 +7633,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map, cpu_attach_domain(sd, rd, i); } - SCHED_CPUMASK_FREE((void *)allmasks); + sched_cpumask_free(allmasks); return 0; #ifdef CONFIG_NUMA error: free_sched_groups(cpu_map, tmpmask); - SCHED_CPUMASK_FREE((void *)allmasks); + sched_cpumask_free(allmasks); kfree(rd); return -ENOMEM; #endif -- cgit v1.2.3 From ae1e9130bfb9ad55eb97ec3fb17a122b7a118f98 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Nov 2008 09:05:16 +0100 Subject: sched: rename SCHED_NO_NO_OMIT_FRAME_POINTER => SCHED_OMIT_FRAME_POINTER Impact: cleanup, change .config option name We had this ugly config name for a long time for hysteric raisons. Rename it to a saner name. We still cannot get rid of it completely, until /proc//stack usage replaces WCHAN usage for good. We'll be able to do that in the v2.6.29/v2.6.30 timeframe. Signed-off-by: Ingo Molnar --- arch/ia64/Kconfig | 2 +- arch/m32r/Kconfig | 2 +- arch/mips/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/x86/Kconfig | 2 +- include/asm-m32r/system.h | 2 +- kernel/Makefile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 27eec71429b0..59d12788b60c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -99,7 +99,7 @@ config GENERIC_IOMAP bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index dbaed4a63815..29047d5c259a 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f4af967a6b30..a5255e7c79e0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a4de93..adb23ea1c1ef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,7 +141,7 @@ config GENERIC_NVRAM bool default y if PPC32 -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d5550d19b66..74db682ec1ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -364,7 +364,7 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" depends on X86 diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 70a57c8c002b..c980f5ba8de7 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -23,7 +23,7 @@ */ #if defined(CONFIG_FRAME_POINTER) || \ - !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER) + !defined(CONFIG_SCHED_OMIT_FRAME_POINTER) #define M32R_PUSH_FP " push fp\n" #define M32R_POP_FP " pop fp\n" #else diff --git a/kernel/Makefile b/kernel/Makefile index e1af03972148..46e67a398495 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,7 +91,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o -ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) +ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure -- cgit v1.2.3 From ff9b48c3598732926fa09afd7f526981c32a48cc Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 10 Nov 2008 21:34:09 +0530 Subject: sched: include group statistics in /proc/sched_debug Impact: extend /proc/sched_debug info Since the statistics of a group entity isn't exported directly from the kernel, it becomes difficult to obtain some of the group statistics. For example, the current method to obtain exec time of a group entity is not always accurate. One has to read the exec times of all the tasks(/proc//sched) in the group and add them. This method fails (or becomes difficult) if we want to collect stats of a group over a duration where tasks get created and terminated. This patch makes it easier to obtain group stats by directly including them in /proc/sched_debug. Stats like group exec time would help user programs (like LTP) to accurately measure the group fairness. An example output of group stats from /proc/sched_debug: cfs_rq[3]:/3/a/1 .exec_clock : 89.598007 .MIN_vruntime : 0.000001 .min_vruntime : 256300.970506 .max_vruntime : 0.000001 .spread : 0.000000 .spread0 : -25373.372248 .nr_running : 0 .load : 0 .yld_exp_empty : 0 .yld_act_empty : 0 .yld_both_empty : 0 .yld_count : 4474 .sched_switch : 0 .sched_count : 40507 .sched_goidle : 12686 .ttwu_count : 15114 .ttwu_local : 11950 .bkl_count : 67 .nr_spread_over : 0 .shares : 0 .se->exec_start : 113676.727170 .se->vruntime : 1592.612714 .se->sum_exec_runtime : 89.598007 .se->wait_start : 0.000000 .se->sleep_start : 0.000000 .se->block_start : 0.000000 .se->sleep_max : 0.000000 .se->block_max : 0.000000 .se->exec_max : 1.000282 .se->slice_max : 1.999750 .se->wait_max : 54.981093 .se->wait_sum : 217.610521 .se->wait_count : 50 .se->load.weight : 2 Signed-off-by: Bharata B Rao Acked-by: Srivatsa Vaddagiri Acked-by: Dhaval Giani Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index d25cefe3f0eb..3625a6598699 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec) #define SPLIT_NS(x) nsec_high(x), nsec_low(x) +#ifdef CONFIG_FAIR_GROUP_SCHED +static void print_cfs_group_stats(struct seq_file *m, int cpu, + struct task_group *tg) +{ + struct sched_entity *se = tg->se[cpu]; + if (!se) + return; + +#define P(F) \ + SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) +#define PN(F) \ + SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) + + PN(se->exec_start); + PN(se->vruntime); + PN(se->sum_exec_runtime); +#ifdef CONFIG_SCHEDSTATS + PN(se->wait_start); + PN(se->sleep_start); + PN(se->block_start); + PN(se->sleep_max); + PN(se->block_max); + PN(se->exec_max); + PN(se->slice_max); + PN(se->wait_max); + PN(se->wait_sum); + P(se->wait_count); +#endif + P(se->load.weight); +#undef PN +#undef P +} +#endif + static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { @@ -181,6 +215,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #ifdef CONFIG_SMP SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); #endif + print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } @@ -261,7 +296,7 @@ static int sched_debug_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); -- cgit v1.2.3 From 851f7ff56d9c21272f289dd85fb3f1b6cf7a6e10 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:14 +1100 Subject: This patch will print cap_permitted and cap_inheritable data in the PATH records of any file that has file capabilities set. Files which do not have fcaps set will not have different PATH records. An example audit record if you run: setcap "cap_net_admin+pie" /bin/bash /bin/bash type=SYSCALL msg=audit(1225741937.363:230): arch=c000003e syscall=59 success=yes exit=0 a0=2119230 a1=210da30 a2=20ee290 a3=8 items=2 ppid=2149 pid=2923 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=EXECVE msg=audit(1225741937.363:230): argc=2 a0="ping" a1="www.google.com" type=CWD msg=audit(1225741937.363:230): cwd="/root" type=PATH msg=audit(1225741937.363:230): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0104755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fi=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225741937.363:230): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 5 +++ kernel/auditsc.c | 82 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/capability.h b/include/linux/capability.h index d567af247ed8..0f1950181102 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -53,6 +53,7 @@ typedef struct __user_cap_data_struct { #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX #define VFS_CAP_REVISION_MASK 0xFF000000 +#define VFS_CAP_REVISION_SHIFT 24 #define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK #define VFS_CAP_FLAGS_EFFECTIVE 0x000001 @@ -534,6 +535,10 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); extern int capable(int cap); +/* audit system wants to get cap info from files as well */ +struct dentry; +extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..de7e9bcba9ae 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "audit.h" @@ -84,6 +85,15 @@ int audit_n_rules; /* determines whether we collect data for signals sent */ int audit_signals; +struct audit_cap_data { + kernel_cap_t permitted; + kernel_cap_t inheritable; + union { + unsigned int fE; /* effective bit of a file capability */ + kernel_cap_t effective; /* effective set of a process */ + }; +}; + /* When fs/namei.c:getname() is called, we store the pointer in name and * we don't let putname() free it (instead we free all of the saved * pointers at syscall exit time). @@ -100,6 +110,8 @@ struct audit_names { gid_t gid; dev_t rdev; u32 osid; + struct audit_cap_data fcap; + unsigned int fcap_ver; }; struct audit_aux_data { @@ -1171,6 +1183,35 @@ static void audit_log_execve_info(struct audit_context *context, kfree(buf); } +static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) +{ + int i; + + audit_log_format(ab, " %s=", prefix); + CAP_FOR_EACH_U32(i) { + audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]); + } +} + +static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) +{ + kernel_cap_t *perm = &name->fcap.permitted; + kernel_cap_t *inh = &name->fcap.inheritable; + int log = 0; + + if (!cap_isclear(*perm)) { + audit_log_cap(ab, "cap_fp", perm); + log = 1; + } + if (!cap_isclear(*inh)) { + audit_log_cap(ab, "cap_fi", inh); + log = 1; + } + + if (log) + audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver); +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -1421,6 +1462,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } } + audit_log_fcaps(ab, n); + audit_log_end(ab); } @@ -1787,8 +1830,36 @@ static int audit_inc_name_count(struct audit_context *context, return 0; } + +static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry) +{ + struct cpu_vfs_cap_data caps; + int rc; + + memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t)); + memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t)); + name->fcap.fE = 0; + name->fcap_ver = 0; + + if (!dentry) + return 0; + + rc = get_vfs_caps_from_disk(dentry, &caps); + if (rc) + return rc; + + name->fcap.permitted = caps.permitted; + name->fcap.inheritable = caps.inheritable; + name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; + + return 0; +} + + /* Copy inode data into an audit_names. */ -static void audit_copy_inode(struct audit_names *name, const struct inode *inode) +static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, + const struct inode *inode) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; @@ -1797,6 +1868,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode name->gid = inode->i_gid; name->rdev = inode->i_rdev; security_inode_getsecid(inode, &name->osid); + audit_copy_fcaps(name, dentry); } /** @@ -1831,7 +1903,7 @@ void __audit_inode(const char *name, const struct dentry *dentry) context->names[idx].name = NULL; } handle_path(dentry); - audit_copy_inode(&context->names[idx], inode); + audit_copy_inode(&context->names[idx], dentry, inode); } /** @@ -1892,7 +1964,7 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry, if (!strcmp(dname, n->name) || !audit_compare_dname_path(dname, n->name, &dirlen)) { if (inode) - audit_copy_inode(n, inode); + audit_copy_inode(n, NULL, inode); else n->ino = (unsigned long)-1; found_child = n->name; @@ -1906,7 +1978,7 @@ add_names: return; idx = context->name_count - 1; context->names[idx].name = NULL; - audit_copy_inode(&context->names[idx], parent); + audit_copy_inode(&context->names[idx], NULL, parent); } if (!found_child) { @@ -1927,7 +1999,7 @@ add_names: } if (inode) - audit_copy_inode(&context->names[idx], inode); + audit_copy_inode(&context->names[idx], NULL, inode); else context->names[idx].ino = (unsigned long)-1; } -- cgit v1.2.3 From 3fc689e96c0c90b6fede5946d6c31075e9464f69 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:18 +1100 Subject: Any time fcaps or a setuid app under SECURE_NOROOT is used to result in a non-zero pE we will crate a new audit record which contains the entire set of known information about the executable in question, fP, fI, fE, fversion and includes the process's pE, pI, pP. Before and after the bprm capability are applied. This record type will only be emitted from execve syscalls. an example of making ping use fcaps instead of setuid: setcap "cat_net_raw+pe" /bin/ping type=SYSCALL msg=audit(1225742021.015:236): arch=c000003e syscall=59 success=yes exit=0 a0=1457f30 a1=14606b0 a2=1463940 a3=321b770a70 items=2 ppid=2929 pid=2963 auid=0 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1321] msg=audit(1225742021.015:236): fver=2 fp=0000000000002000 fi=0000000000000000 fe=1 old_pp=0000000000000000 old_pi=0000000000000000 old_pe=0000000000000000 new_pp=0000000000002000 new_pi=0000000000000000 new_pe=0000000000002000 type=EXECVE msg=audit(1225742021.015:236): argc=2 a0="ping" a1="127.0.0.1" type=CWD msg=audit(1225742021.015:236): cwd="/home/test" type=PATH msg=audit(1225742021.015:236): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225742021.015:236): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 26 ++++++++++++++++++++ kernel/auditsc.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ security/commoncap.c | 23 ++++++++++++++++- 3 files changed, 116 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43c..8cfb9feb2a05 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -99,6 +99,7 @@ #define AUDIT_OBJ_PID 1318 /* ptrace target */ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ +#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -452,6 +453,7 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); +extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -501,6 +503,29 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return __audit_mq_getsetattr(mqdes, mqstat); return 0; } + +/* + * ieieeeeee, an audit function without a return code! + * + * This function might fail! I decided that it didn't matter. We are too late + * to fail the syscall and the information isn't REQUIRED for any purpose. It's + * just nice to have. We should be able to look at past audit logs to figure + * out this process's current cap set along with the fcaps from the PATH record + * and use that to come up with the final set. Yeah, its ugly, but all the info + * is still in the audit log. So I'm not going to bother mentioning we failed + * if we couldn't allocate memory. + * + * If someone changes their mind they could create the aux record earlier and + * then search here and use that earlier allocation. But I don't wanna. + * + * -Eric + */ +static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +{ + if (unlikely(!audit_dummy_context())) + __audit_log_bprm_fcaps(bprm, pP, pE); +} + extern int audit_n_rules; extern int audit_signals; #else @@ -532,6 +557,7 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) +#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index de7e9bcba9ae..3229cd4206f5 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -196,6 +196,14 @@ struct audit_aux_data_pids { int pid_count; }; +struct audit_aux_data_bprm_fcaps { + struct audit_aux_data d; + struct audit_cap_data fcap; + unsigned int fcap_ver; + struct audit_cap_data old_pcap; + struct audit_cap_data new_pcap; +}; + struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -1375,6 +1383,20 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); break; } + case AUDIT_BPRM_FCAPS: { + struct audit_aux_data_bprm_fcaps *axs = (void *)aux; + audit_log_format(ab, "fver=%x", axs->fcap_ver); + audit_log_cap(ab, "fp", &axs->fcap.permitted); + audit_log_cap(ab, "fi", &axs->fcap.inheritable); + audit_log_format(ab, " fe=%d", axs->fcap.fE); + audit_log_cap(ab, "old_pp", &axs->old_pcap.permitted); + audit_log_cap(ab, "old_pi", &axs->old_pcap.inheritable); + audit_log_cap(ab, "old_pe", &axs->old_pcap.effective); + audit_log_cap(ab, "new_pp", &axs->new_pcap.permitted); + audit_log_cap(ab, "new_pi", &axs->new_pcap.inheritable); + audit_log_cap(ab, "new_pe", &axs->new_pcap.effective); + break; } + } audit_log_end(ab); } @@ -2501,6 +2523,52 @@ int __audit_signal_info(int sig, struct task_struct *t) return 0; } +/** + * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps + * @bprm pointer to the bprm being processed + * @caps the caps read from the disk + * + * Simply check if the proc already has the caps given by the file and if not + * store the priv escalation info for later auditing at the end of the syscall + * + * this can fail and we don't care. See the note in audit.h for + * audit_log_bprm_fcaps() for my explaination.... + * + * -Eric + */ +void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +{ + struct audit_aux_data_bprm_fcaps *ax; + struct audit_context *context = current->audit_context; + struct cpu_vfs_cap_data vcaps; + struct dentry *dentry; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); + if (!ax) + return; + + ax->d.type = AUDIT_BPRM_FCAPS; + ax->d.next = context->aux; + context->aux = (void *)ax; + + dentry = dget(bprm->file->f_dentry); + get_vfs_caps_from_disk(dentry, &vcaps); + dput(dentry); + + ax->fcap.permitted = vcaps.permitted; + ax->fcap.inheritable = vcaps.inheritable; + ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; + + ax->old_pcap.permitted = *pP; + ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.effective = *pE; + + ax->new_pcap.permitted = current->cap_permitted; + ax->new_pcap.inheritable = current->cap_inheritable; + ax->new_pcap.effective = current->cap_effective; +} + /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value diff --git a/security/commoncap.c b/security/commoncap.c index d7eff5797b91..d45393380997 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -376,6 +377,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { + kernel_cap_t pP = current->cap_permitted; + kernel_cap_t pE = current->cap_effective; + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset(bprm->cap_post_exec_permitted, current->cap_permitted)) { @@ -409,7 +413,24 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) cap_clear(current->cap_effective); } - /* AUD: Audit candidate if current->cap_effective is set */ + /* + * Audit candidate if current->cap_effective is set + * + * We do not bother to audit if 3 things are true: + * 1) cap_effective has all caps + * 2) we are root + * 3) root is supposed to have all caps (SECURE_NOROOT) + * Since this is just a normal root execing a process. + * + * Number 1 above might fail if you don't have a full bset, but I think + * that is interesting information to audit. + */ + if (!cap_isclear(current->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || + (bprm->e_uid != 0) || (current->uid != 0) || + issecure(SECURE_NOROOT)) + audit_log_bprm_fcaps(bprm, &pP, &pE); + } current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } -- cgit v1.2.3 From e68b75a027bb94066576139ee33676264f867b87 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:22 +1100 Subject: When the capset syscall is used it is not possible for audit to record the actual capbilities being added/removed. This patch adds a new record type which emits the target pid and the eff, inh, and perm cap sets. example output if you audit capset syscalls would be: type=SYSCALL msg=audit(1225743140.465:76): arch=c000003e syscall=126 success=yes exit=0 a0=17f2014 a1=17f201c a2=80000000 a3=7fff2ab7f060 items=0 ppid=2160 pid=2223 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="setcap" exe="/usr/sbin/setcap" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1322] msg=audit(1225743140.465:76): pid=0 cap_pi=ffffffffffffffff cap_pp=ffffffffffffffff cap_pe=ffffffffffffffff Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 10 ++++++++++ kernel/auditsc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/capability.c | 5 +++++ 3 files changed, 63 insertions(+) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8cfb9feb2a05..6fbebac7b1bf 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -100,6 +100,7 @@ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ +#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -454,6 +455,7 @@ extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __u extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); +extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -526,6 +528,13 @@ static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t __audit_log_bprm_fcaps(bprm, pP, pE); } +static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +{ + if (unlikely(!audit_dummy_context())) + return __audit_log_capset(pid, eff, inh, perm); + return 0; +} + extern int audit_n_rules; extern int audit_signals; #else @@ -558,6 +567,7 @@ extern int audit_signals; #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) #define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) +#define audit_log_capset(pid, e, i, p) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3229cd4206f5..cef34235b362 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -204,6 +204,12 @@ struct audit_aux_data_bprm_fcaps { struct audit_cap_data new_pcap; }; +struct audit_aux_data_capset { + struct audit_aux_data d; + pid_t pid; + struct audit_cap_data cap; +}; + struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -1397,6 +1403,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_cap(ab, "new_pe", &axs->new_pcap.effective); break; } + case AUDIT_CAPSET: { + struct audit_aux_data_capset *axs = (void *)aux; + audit_log_format(ab, "pid=%d", axs->pid); + audit_log_cap(ab, "cap_pi", &axs->cap.inheritable); + audit_log_cap(ab, "cap_pp", &axs->cap.permitted); + audit_log_cap(ab, "cap_pe", &axs->cap.effective); + break; } + } audit_log_end(ab); } @@ -2569,6 +2583,40 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->new_pcap.effective = current->cap_effective; } +/** + * __audit_log_capset - store information about the arguments to the capset syscall + * @pid target pid of the capset call + * @eff effective cap set + * @inh inheritible cap set + * @perm permited cap set + * + * Record the aguments userspace sent to sys_capset for later printing by the + * audit system if applicable + */ +int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +{ + struct audit_aux_data_capset *ax; + struct audit_context *context = current->audit_context; + + if (likely(!audit_enabled || !context || context->dummy)) + return 0; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); + if (!ax) + return -ENOMEM; + + ax->d.type = AUDIT_CAPSET; + ax->d.next = context->aux; + context->aux = (void *)ax; + + ax->pid = pid; + ax->cap.effective = *eff; + ax->cap.inheritable = *eff; + ax->cap.permitted = *perm; + + return 0; +} + /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value diff --git a/kernel/capability.c b/kernel/capability.c index e13a68535ad5..19f9eda89975 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -7,6 +7,7 @@ * 30 May 2002: Cleanup, Robert M. Love */ +#include #include #include #include @@ -468,6 +469,10 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) i++; } + ret = audit_log_capset(pid, &effective, &inheritable, &permitted); + if (ret) + return ret; + if (pid && (pid != task_pid_vnr(current))) ret = do_sys_capset_other_tasks(pid, &effective, &inheritable, &permitted); -- cgit v1.2.3 From 637d32dc720897616e8a1a4f9e9609e29d431800 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 29 Oct 2008 15:42:12 +1100 Subject: Capabilities: BUG when an invalid capability is requested If an invalid (large) capability is requested the capabilities system may panic as it is dereferencing an array of fixed (short) length. Its possible (and actually often happens) that the capability system accidentally stumbled into a valid memory region but it also regularly happens that it hits invalid memory and BUGs. If such an operation does get past cap_capable then the selinux system is sure to have problems as it already does a (simple) validity check and BUG. This is known to happen by the broken and buggy firegl driver. This patch cleanly checks all capable calls and BUG if a call is for an invalid capability. This will likely break the firegl driver for some situations, but it is the right thing to do. Garbage into a security system gets you killed/bugged Signed-off-by: Eric Paris Acked-by: Arjan van de Ven Acked-by: Serge Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- kernel/capability.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/capability.c b/kernel/capability.c index 19f9eda89975..adb262f83de1 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -514,6 +514,11 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) */ int capable(int cap) { + if (unlikely(!cap_valid(cap))) { + printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); + BUG(); + } + if (has_capability(current, cap)) { current->flags |= PF_SUPERPRIV; return 1; -- cgit v1.2.3 From 934352f214b3251eb0793c1209d346595a661d80 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 10 Nov 2008 20:41:13 +0530 Subject: sched: add hierarchical accounting to cpu accounting controller Impact: improve CPU time accounting of tasks under the cpu accounting controller Add hierarchical accounting to cpu accounting controller and include cpuacct documentation. Currently, while charging the task's cputime to its accounting group, the accounting group hierarchy isn't updated. This patch charges the cputime of a task to its accounting group and all its parent accounting groups. Reported-by: Srivatsa Vaddagiri Signed-off-by: Bharata B Rao Reviewed-by: Paul Menage Acked-by: Balbir Singh Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/controllers/cpuacct.txt | 32 ++++++++++++++++++++++++++++++++ kernel/sched.c | 12 +++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 Documentation/controllers/cpuacct.txt (limited to 'kernel') diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt new file mode 100644 index 000000000000..bb775fbe43d7 --- /dev/null +++ b/Documentation/controllers/cpuacct.txt @@ -0,0 +1,32 @@ +CPU Accounting Controller +------------------------- + +The CPU accounting controller is used to group tasks using cgroups and +account the CPU usage of these groups of tasks. + +The CPU accounting controller supports multi-hierarchy groups. An accounting +group accumulates the CPU usage of all of its child groups and the tasks +directly present in its group. + +Accounting groups can be created by first mounting the cgroup filesystem. + +# mkdir /cgroups +# mount -t cgroup -ocpuacct none /cgroups + +With the above step, the initial or the parent accounting group +becomes visible at /cgroups. At bootup, this group includes all the +tasks in the system. /cgroups/tasks lists the tasks in this cgroup. +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by +this group which is essentially the CPU time obtained by all the tasks +in the system. + +New accounting groups can be created under the parent group /cgroups. + +# cd /cgroups +# mkdir g1 +# echo $$ > g1 + +The above steps create a new group g1 and move the current shell +process (bash) into it. CPU time consumed by this bash and its children +can be obtained from g1/cpuacct.usage and the same is accumulated in +/cgroups/cpuacct.usage also. diff --git a/kernel/sched.c b/kernel/sched.c index 59db86c915f9..ebaf432365f6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9196,11 +9196,12 @@ struct cgroup_subsys cpu_cgroup_subsys = { * (balbir@in.ibm.com). */ -/* track cpu usage of a group of tasks */ +/* track cpu usage of a group of tasks and its child groups */ struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ u64 *cpuusage; + struct cpuacct *parent; }; struct cgroup_subsys cpuacct_subsys; @@ -9234,6 +9235,9 @@ static struct cgroup_subsys_state *cpuacct_create( return ERR_PTR(-ENOMEM); } + if (cgrp->parent) + ca->parent = cgroup_ca(cgrp->parent); + return &ca->css; } @@ -9313,14 +9317,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) static void cpuacct_charge(struct task_struct *tsk, u64 cputime) { struct cpuacct *ca; + int cpu; if (!cpuacct_subsys.active) return; + cpu = task_cpu(tsk); ca = task_ca(tsk); - if (ca) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk)); + for (; ca; ca = ca->parent) { + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; } } -- cgit v1.2.3 From 76aac0e9a17742e60d408be1a706e9aaad370891 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:12 +1100 Subject: CRED: Wrap task credential accesses in the core kernel Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: Al Viro Cc: linux-audit@redhat.com Cc: containers@lists.linux-foundation.org Cc: linux-mm@kvack.org Signed-off-by: James Morris --- kernel/acct.c | 7 +++---- kernel/auditsc.c | 6 ++++-- kernel/cgroup.c | 9 +++++---- kernel/futex.c | 8 +++++--- kernel/futex_compat.c | 3 ++- kernel/ptrace.c | 15 +++++++++------ kernel/sched.c | 11 +++++++---- kernel/signal.c | 15 +++++++++------ kernel/sys.c | 16 ++++++++-------- kernel/sysctl.c | 2 +- kernel/timer.c | 8 ++++---- kernel/user_namespace.c | 2 +- mm/mempolicy.c | 7 +++++-- mm/migrate.c | 7 +++++-- mm/shmem.c | 8 ++++---- 15 files changed, 72 insertions(+), 52 deletions(-) (limited to 'kernel') diff --git a/kernel/acct.c b/kernel/acct.c index f6006a60df5d..d57b7cbb98b6 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -530,15 +530,14 @@ static void do_acct_process(struct bsd_acct_struct *acct, do_div(elapsed, AHZ); ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ - ac.ac_uid = current->uid; - ac.ac_gid = current->gid; + current_uid_gid(&ac.ac_uid, &ac.ac_gid); #if ACCT_VERSION==2 ac.ac_ahz = AHZ; #endif #if ACCT_VERSION==1 || ACCT_VERSION==2 /* backward-compatible 16 bit fields */ - ac.ac_uid16 = current->uid; - ac.ac_gid16 = current->gid; + ac.ac_uid16 = ac.ac_uid; + ac.ac_gid16 = ac.ac_gid; #endif #if ACCT_VERSION==3 ac.ac_pid = task_tgid_nr_ns(current, ns); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cef34235b362..9c7e47ae4576 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2628,7 +2628,8 @@ void audit_core_dumps(long signr) { struct audit_buffer *ab; u32 sid; - uid_t auid = audit_get_loginuid(current); + uid_t auid = audit_get_loginuid(current), uid; + gid_t gid; unsigned int sessionid = audit_get_sessionid(current); if (!audit_enabled) @@ -2638,8 +2639,9 @@ void audit_core_dumps(long signr) return; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); + current_uid_gid(&uid, &gid); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", - auid, current->uid, current->gid, sessionid); + auid, uid, gid, sessionid); security_task_getsecid(current, &sid); if (sid) { char *ctx = NULL; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 35eebd5510c2..78f9b310c4f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -571,8 +571,8 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) if (inode) { inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info; @@ -1279,6 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; + uid_t euid; int ret; if (pid) { @@ -1291,8 +1292,8 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) get_task_struct(tsk); rcu_read_unlock(); - if ((current->euid) && (current->euid != tsk->uid) - && (current->euid != tsk->suid)) { + euid = current_euid(); + if (euid && euid != tsk->uid && euid != tsk->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/futex.c b/kernel/futex.c index 8af10027514b..e06962132aaf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -439,10 +439,11 @@ static void free_pi_state(struct futex_pi_state *pi_state) static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; + uid_t euid = current_euid(); rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || ((current->euid != p->euid) && (current->euid != p->uid))) + if (!p || (euid != p->euid && euid != p->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1829,6 +1830,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, { struct robust_list_head __user *head; unsigned long ret; + uid_t euid = current_euid(); if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1844,8 +1846,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_PTRACE)) + if (euid != p->euid && euid != p->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 04ac3a9e42cf..3254d4e41e88 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -135,6 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; + uid_t euid = current_euid(); if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -150,7 +151,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && + if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1e68e4c39e2c..937f6b5b2008 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -123,16 +123,19 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ + uid_t uid; + gid_t gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + current_uid_gid(&uid, &gid); + if ((uid != task->euid || + uid != task->suid || + uid != task->uid || + gid != task->egid || + gid != task->sgid || + gid != task->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f462..c3b8b1fcde0d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5128,6 +5128,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; + uid_t euid; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -5180,8 +5181,9 @@ recheck: return -EPERM; /* can't change other user's priorities */ - if ((current->euid != p->euid) && - (current->euid != p->uid)) + euid = current_euid(); + if (euid != p->euid && + euid != p->uid) return -EPERM; } @@ -5392,6 +5394,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) cpumask_t cpus_allowed; cpumask_t new_mask = *in_mask; struct task_struct *p; + uid_t euid; int retval; get_online_cpus(); @@ -5412,9 +5415,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); + euid = current_euid(); retval = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_NICE)) + if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc654455..167b535fe1a9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -567,6 +567,7 @@ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { struct pid *sid; + uid_t uid, euid; int error; if (!valid_signal(sig)) @@ -579,8 +580,10 @@ static int check_kill_permission(int sig, struct siginfo *info, if (error) return error; - if ((current->euid ^ t->suid) && (current->euid ^ t->uid) && - (current->uid ^ t->suid) && (current->uid ^ t->uid) && + uid = current_uid(); + euid = current_euid(); + if ((euid ^ t->suid) && (euid ^ t->uid) && + (uid ^ t->suid) && (uid ^ t->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -844,7 +847,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, q->info.si_errno = 0; q->info.si_code = SI_USER; q->info.si_pid = task_pid_vnr(current); - q->info.si_uid = current->uid; + q->info.si_uid = current_uid(); break; case (unsigned long) SEND_SIG_PRIV: q->info.si_signo = sig; @@ -1598,7 +1601,7 @@ void ptrace_notify(int exit_code) info.si_signo = SIGTRAP; info.si_code = exit_code; info.si_pid = task_pid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); /* Let the debugger run. */ spin_lock_irq(¤t->sighand->siglock); @@ -2211,7 +2214,7 @@ sys_kill(pid_t pid, int sig) info.si_errno = 0; info.si_code = SI_USER; info.si_pid = task_tgid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); return kill_something_info(sig, &info, pid); } @@ -2228,7 +2231,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) info.si_errno = 0; info.si_code = SI_TKILL; info.si_pid = task_tgid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); rcu_read_lock(); p = find_task_by_vpid(pid); diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d16..ed5c29c748ac 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -114,10 +114,10 @@ void (*pm_power_off_prepare)(void); static int set_one_prio(struct task_struct *p, int niceval, int error) { + uid_t euid = current_euid(); int no_nice; - if (p->uid != current->euid && - p->euid != current->euid && !capable(CAP_SYS_NICE)) { + if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -176,16 +176,16 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) case PRIO_USER: user = current->user; if (!who) - who = current->uid; + who = current_uid(); else - if ((who != current->uid) && !(user = find_user(who))) + if (who != current_uid() && !(user = find_user(who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) if (p->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); - if (who != current->uid) + if (who != current_uid()) free_uid(user); /* For find_user() */ break; } @@ -238,9 +238,9 @@ asmlinkage long sys_getpriority(int which, int who) case PRIO_USER: user = current->user; if (!who) - who = current->uid; + who = current_uid(); else - if ((who != current->uid) && !(user = find_user(who))) + if (who != current_uid() && !(user = find_user(who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -250,7 +250,7 @@ asmlinkage long sys_getpriority(int which, int who) retval = niceval; } while_each_thread(g, p); - if (who != current->uid) + if (who != current_uid()) free_uid(user); /* for find_user() */ break; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d048fa2d902..511031381c33 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1641,7 +1641,7 @@ out: static int test_perm(int mode, int op) { - if (!current->euid) + if (!current_euid()) mode >>= 6; else if (in_egroup_p(0)) mode >>= 3; diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c58..b54e4646cee7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1123,25 +1123,25 @@ asmlinkage long sys_getppid(void) asmlinkage long sys_getuid(void) { /* Only we change this so SMP safe */ - return current->uid; + return current_uid(); } asmlinkage long sys_geteuid(void) { /* Only we change this so SMP safe */ - return current->euid; + return current_euid(); } asmlinkage long sys_getgid(void) { /* Only we change this so SMP safe */ - return current->gid; + return current_gid(); } asmlinkage long sys_getegid(void) { /* Only we change this so SMP safe */ - return current->egid; + return current_egid(); } #endif diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 532858fa5b88..f82730adea00 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -38,7 +38,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) } /* Reset current->user with a new one */ - new_user = alloc_uid(ns, current->uid); + new_user = alloc_uid(ns, current_uid()); if (!new_user) { free_uid(ns->root_user); kfree(ns); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..07a96474077d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1115,6 +1115,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, nodemask_t old; nodemask_t new; nodemask_t task_nodes; + uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1144,8 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - if ((current->euid != task->suid) && (current->euid != task->uid) && - (current->uid != task->suid) && (current->uid != task->uid) && + uid = current_uid(); + euid = current_euid(); + if (euid != task->suid && euid != task->uid && + uid != task->suid && uid != task->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..6263c24c4afe 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1048,6 +1048,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, struct task_struct *task; struct mm_struct *mm; int err; + uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1075,8 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - if ((current->euid != task->suid) && (current->euid != task->uid) && - (current->uid != task->suid) && (current->uid != task->uid) && + uid = current_uid(); + euid = current_euid(); + if (euid != task->suid && euid != task->uid && + uid != task->suid && uid != task->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/shmem.c b/mm/shmem.c index 0ed075215e5f..f1b0d4871f3a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1513,8 +1513,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode = new_inode(sb); if (inode) { inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -2278,8 +2278,8 @@ static int shmem_fill_super(struct super_block *sb, sbinfo->max_blocks = 0; sbinfo->max_inodes = 0; sbinfo->mode = S_IRWXUGO | S_ISVTX; - sbinfo->uid = current->fsuid; - sbinfo->gid = current->fsgid; + sbinfo->uid = current_fsuid(); + sbinfo->gid = current_fsgid(); sbinfo->mpol = NULL; sb->s_fs_info = sbinfo; -- cgit v1.2.3 From 8bbf4976b59fc9fc2861e79cab7beb3f6d647640 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: KEYS: Alter use of key instantiation link-to-keyring argument Alter the use of the key instantiation and negation functions' link-to-keyring arguments. Currently this specifies a keyring in the target process to link the key into, creating the keyring if it doesn't exist. This, however, can be a problem for copy-on-write credentials as it means that the instantiating process can alter the credentials of the requesting process. This patch alters the behaviour such that: (1) If keyctl_instantiate_key() or keyctl_negate_key() are given a specific keyring by ID (ringid >= 0), then that keyring will be used. (2) If keyctl_instantiate_key() or keyctl_negate_key() are given one of the special constants that refer to the requesting process's keyrings (KEY_SPEC_*_KEYRING, all <= 0), then: (a) If sys_request_key() was given a keyring to use (destringid) then the key will be attached to that keyring. (b) If sys_request_key() was given a NULL keyring, then the key being instantiated will be attached to the default keyring as set by keyctl_set_reqkey_keyring(). (3) No extra link will be made. Decision point (1) follows current behaviour, and allows those instantiators who've searched for a specifically named keyring in the requestor's keyring so as to partition the keys by type to still have their named keyrings. Decision point (2) allows the requestor to make sure that the key or keys that get produced by request_key() go where they want, whilst allowing the instantiator to request that the key is retained. This is mainly useful for situations where the instantiator makes a secondary request, the key for which should be retained by the initial requestor: +-----------+ +--------------+ +--------------+ | | | | | | | Requestor |------->| Instantiator |------->| Instantiator | | | | | | | +-----------+ +--------------+ +--------------+ request_key() request_key() This might be useful, for example, in Kerberos, where the requestor requests a ticket, and then the ticket instantiator requests the TGT, which someone else then has to go and fetch. The TGT, however, should be retained in the keyrings of the requestor, not the first instantiator. To make this explict an extra special keyring constant is also added. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/key.h | 16 +++--- include/linux/keyctl.h | 4 +- kernel/kmod.c | 2 +- security/keys/internal.h | 12 ++-- security/keys/keyctl.c | 118 +++++++++++++++++++++++---------------- security/keys/process_keys.c | 88 ++++++++++++++++++----------- security/keys/request_key.c | 75 +++++++++++++++++-------- security/keys/request_key_auth.c | 5 +- 8 files changed, 199 insertions(+), 121 deletions(-) (limited to 'kernel') diff --git a/include/linux/key.h b/include/linux/key.h index 1b70e35a71e3..df709e1af3cd 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -287,11 +287,11 @@ extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(tsk, keyring) \ -({ \ - struct key *old_session = tsk->signal->session_keyring; \ - tsk->signal->session_keyring = keyring; \ - old_session; \ +#define __install_session_keyring(keyring) \ +({ \ + struct key *old_session = current->signal->session_keyring; \ + current->signal->session_keyring = keyring; \ + old_session; \ }) #else /* CONFIG_KEYS */ @@ -302,11 +302,11 @@ extern void key_init(void); #define key_revoke(k) do { } while(0) #define key_put(k) do { } while(0) #define key_ref_put(k) do { } while(0) -#define make_key_ref(k, p) ({ NULL; }) -#define key_ref_to_ptr(k) ({ NULL; }) +#define make_key_ref(k, p) NULL +#define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 #define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(t, k) ({ NULL; }) +#define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 #define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 656ee6b77a4a..c0688eb72093 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -1,6 +1,6 @@ /* keyctl.h: keyctl command IDs * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -20,6 +20,7 @@ #define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ #define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ #define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */ +#define KEY_SPEC_REQUESTOR_KEYRING -8 /* - key ID for request_key() dest keyring */ /* request-key default keyrings */ #define KEY_REQKEY_DEFL_NO_CHANGE -1 @@ -30,6 +31,7 @@ #define KEY_REQKEY_DEFL_USER_KEYRING 4 #define KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 #define KEY_REQKEY_DEFL_GROUP_KEYRING 6 +#define KEY_REQKEY_DEFL_REQUESTOR_KEYRING 7 /* keyctl commands */ #define KEYCTL_GET_KEYRING_ID 0 /* ask for a keyring's ID */ diff --git a/kernel/kmod.c b/kernel/kmod.c index 3d3c3ea3a023..f044f8f57703 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -140,7 +140,7 @@ static int ____call_usermodehelper(void *data) /* Unblock all signals and set the session keyring. */ new_session = key_get(sub_info->ring); spin_lock_irq(¤t->sighand->siglock); - old_session = __install_session_keyring(current, new_session); + old_session = __install_session_keyring(new_session); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); recalc_sigpending(); diff --git a/security/keys/internal.h b/security/keys/internal.h index a60c68138b4d..d1586c629788 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -109,8 +109,9 @@ extern key_ref_t search_process_keyrings(struct key_type *type, extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); -extern int install_thread_keyring(struct task_struct *tsk); -extern int install_process_keyring(struct task_struct *tsk); +extern int install_user_keyrings(void); +extern int install_thread_keyring(void); +extern int install_process_keyring(void); extern struct key *request_key_and_link(struct key_type *type, const char *description, @@ -120,8 +121,7 @@ extern struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags); -extern key_ref_t lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, +extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key_perm_t perm); extern long join_session_keyring(const char *name); @@ -152,6 +152,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) */ struct request_key_auth { struct key *target_key; + struct key *dest_keyring; struct task_struct *context; void *callout_info; size_t callout_len; @@ -161,7 +162,8 @@ struct request_key_auth { extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len); + size_t callout_len, + struct key *dest_keyring); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3f09e5b2a784..fcce331eca72 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -103,7 +103,7 @@ asmlinkage long sys_add_key(const char __user *_type, } /* find the target keyring (which must be writable) */ - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error3; @@ -185,7 +185,7 @@ asmlinkage long sys_request_key(const char __user *_type, /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { - dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; @@ -235,7 +235,7 @@ long keyctl_get_keyring_ID(key_serial_t id, int create) key_ref_t key_ref; long ret; - key_ref = lookup_user_key(NULL, id, create, 0, KEY_SEARCH); + key_ref = lookup_user_key(id, create, 0, KEY_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -308,7 +308,7 @@ long keyctl_update_key(key_serial_t id, } /* find the target key (which must be writable) */ - key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -336,7 +336,7 @@ long keyctl_revoke_key(key_serial_t id) key_ref_t key_ref; long ret; - key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -362,7 +362,7 @@ long keyctl_keyring_clear(key_serial_t ringid) key_ref_t keyring_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; @@ -388,13 +388,13 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) key_ref_t keyring_ref, key_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } - key_ref = lookup_user_key(NULL, id, 1, 0, KEY_LINK); + key_ref = lookup_user_key(id, 1, 0, KEY_LINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -422,13 +422,13 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) key_ref_t keyring_ref, key_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } - key_ref = lookup_user_key(NULL, id, 0, 0, 0); + key_ref = lookup_user_key(id, 0, 0, 0); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -464,7 +464,7 @@ long keyctl_describe_key(key_serial_t keyid, char *tmpbuf; long ret; - key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ @@ -472,7 +472,7 @@ long keyctl_describe_key(key_serial_t keyid, instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); - key_ref = lookup_user_key(NULL, keyid, + key_ref = lookup_user_key(keyid, 0, 1, 0); if (!IS_ERR(key_ref)) goto okay; @@ -557,7 +557,7 @@ long keyctl_keyring_search(key_serial_t ringid, } /* get the keyring at which to begin the search */ - keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH); + keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error2; @@ -566,7 +566,7 @@ long keyctl_keyring_search(key_serial_t ringid, /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { - dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; @@ -636,7 +636,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) long ret; /* find the key first */ - key_ref = lookup_user_key(NULL, keyid, 0, 0, 0); + key_ref = lookup_user_key(keyid, 0, 0, 0); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto error; @@ -699,7 +699,7 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) if (uid == (uid_t) -1 && gid == (gid_t) -1) goto error; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -804,7 +804,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -829,6 +829,43 @@ error: } /* end keyctl_setperm_key() */ +/* + * get the destination keyring for instantiation + */ +static long get_instantiation_keyring(key_serial_t ringid, + struct request_key_auth *rka, + struct key **_dest_keyring) +{ + key_ref_t dkref; + + /* just return a NULL pointer if we weren't asked to make a link */ + if (ringid == 0) { + *_dest_keyring = NULL; + return 0; + } + + /* if a specific keyring is nominated by ID, then use that */ + if (ringid > 0) { + dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE); + if (IS_ERR(dkref)) + return PTR_ERR(dkref); + *_dest_keyring = key_ref_to_ptr(dkref); + return 0; + } + + if (ringid == KEY_SPEC_REQKEY_AUTH_KEY) + return -EINVAL; + + /* otherwise specify the destination keyring recorded in the + * authorisation key (any KEY_SPEC_*_KEYRING) */ + if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { + *_dest_keyring = rka->dest_keyring; + return 0; + } + + return -ENOKEY; +} + /*****************************************************************************/ /* * instantiate the key with the specified payload, and, if one is given, link @@ -840,8 +877,7 @@ long keyctl_instantiate_key(key_serial_t id, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey; - key_ref_t keyring_ref; + struct key *instkey, *dest_keyring; void *payload; long ret; bool vm = false; @@ -883,21 +919,15 @@ long keyctl_instantiate_key(key_serial_t id, /* find the destination keyring amongst those belonging to the * requesting task */ - keyring_ref = NULL; - if (ringid) { - keyring_ref = lookup_user_key(rka->context, ringid, 1, 0, - KEY_WRITE); - if (IS_ERR(keyring_ref)) { - ret = PTR_ERR(keyring_ref); - goto error2; - } - } + ret = get_instantiation_keyring(ringid, rka, &dest_keyring); + if (ret < 0) + goto error2; /* instantiate the key and link it into a keyring */ ret = key_instantiate_and_link(rka->target_key, payload, plen, - key_ref_to_ptr(keyring_ref), instkey); + dest_keyring, instkey); - key_ref_put(keyring_ref); + key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ @@ -924,8 +954,7 @@ error: long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey; - key_ref_t keyring_ref; + struct key *instkey, *dest_keyring; long ret; /* the appropriate instantiation authorisation key must have been @@ -941,20 +970,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* find the destination keyring if present (which must also be * writable) */ - keyring_ref = NULL; - if (ringid) { - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring_ref)) { - ret = PTR_ERR(keyring_ref); - goto error; - } - } + ret = get_instantiation_keyring(ringid, rka, &dest_keyring); + if (ret < 0) + goto error; /* instantiate the key and link it into a keyring */ ret = key_negate_and_link(rka->target_key, timeout, - key_ref_to_ptr(keyring_ref), instkey); + dest_keyring, instkey); - key_ref_put(keyring_ref); + key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ @@ -979,13 +1003,13 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: - ret = install_thread_keyring(current); + ret = install_thread_keyring(); if (ret < 0) return ret; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - ret = install_process_keyring(current); + ret = install_process_keyring(); if (ret < 0) return ret; @@ -1018,7 +1042,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) time_t expiry; long ret; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -1105,7 +1129,7 @@ long keyctl_get_security(key_serial_t keyid, char *context; long ret; - key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); if (IS_ERR(key_ref)) { if (PTR_ERR(key_ref) != -EACCES) return PTR_ERR(key_ref); @@ -1117,7 +1141,7 @@ long keyctl_get_security(key_serial_t keyid, return PTR_ERR(key_ref); key_put(instkey); - key_ref = lookup_user_key(NULL, keyid, 0, 1, 0); + key_ref = lookup_user_key(keyid, 0, 1, 0); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 5be6d018759a..1c793b7090a7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -40,9 +40,9 @@ struct key_user root_key_user = { /* * install user and user session keyrings for a particular UID */ -static int install_user_keyrings(struct task_struct *tsk) +int install_user_keyrings(void) { - struct user_struct *user = tsk->user; + struct user_struct *user = current->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -67,7 +67,7 @@ static int install_user_keyrings(struct task_struct *tsk) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - tsk, KEY_ALLOC_IN_QUOTA, + current, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -83,7 +83,8 @@ static int install_user_keyrings(struct task_struct *tsk) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - tsk, KEY_ALLOC_IN_QUOTA, NULL); + current, KEY_ALLOC_IN_QUOTA, + NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -146,8 +147,9 @@ void switch_uid_keyring(struct user_struct *new_user) /* * install a fresh thread keyring, discarding the old one */ -int install_thread_keyring(struct task_struct *tsk) +int install_thread_keyring(void) { + struct task_struct *tsk = current; struct key *keyring, *old; char buf[20]; int ret; @@ -178,8 +180,9 @@ error: /* * make sure a process keyring is installed */ -int install_process_keyring(struct task_struct *tsk) +int install_process_keyring(void) { + struct task_struct *tsk = current; struct key *keyring; char buf[20]; int ret; @@ -218,9 +221,9 @@ error: * install a session keyring, discarding the old one * - if a keyring is not supplied, an empty one is invented */ -static int install_session_keyring(struct task_struct *tsk, - struct key *keyring) +static int install_session_keyring(struct key *keyring) { + struct task_struct *tsk = current; unsigned long flags; struct key *old; char buf[20]; @@ -572,93 +575,91 @@ static int lookup_user_key_possessed(const struct key *key, const void *target) * - don't create special keyrings unless so requested * - partially constructed keys aren't found unless requested */ -key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, - int create, int partial, key_perm_t perm) +key_ref_t lookup_user_key(key_serial_t id, int create, int partial, + key_perm_t perm) { + struct request_key_auth *rka; + struct task_struct *t = current; key_ref_t key_ref, skey_ref; struct key *key; int ret; - if (!context) - context = current; - key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!context->thread_keyring) { + if (!t->thread_keyring) { if (!create) goto error; - ret = install_thread_keyring(context); + ret = install_thread_keyring(); if (ret < 0) { key = ERR_PTR(ret); goto error; } } - key = context->thread_keyring; + key = t->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: - if (!context->signal->process_keyring) { + if (!t->signal->process_keyring) { if (!create) goto error; - ret = install_process_keyring(context); + ret = install_process_keyring(); if (ret < 0) { key = ERR_PTR(ret); goto error; } } - key = context->signal->process_keyring; + key = t->signal->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!context->signal->session_keyring) { + if (!t->signal->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ - ret = install_user_keyrings(context); + ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring( - context, context->user->session_keyring); + ret = install_session_keyring(t->user->session_keyring); if (ret < 0) goto error; } rcu_read_lock(); - key = rcu_dereference(context->signal->session_keyring); + key = rcu_dereference(t->signal->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: - if (!context->user->uid_keyring) { - ret = install_user_keyrings(context); + if (!t->user->uid_keyring) { + ret = install_user_keyrings(); if (ret < 0) goto error; } - key = context->user->uid_keyring; + key = t->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!context->user->session_keyring) { - ret = install_user_keyrings(context); + if (!t->user->session_keyring) { + ret = install_user_keyrings(); if (ret < 0) goto error; } - key = context->user->session_keyring; + key = t->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -669,7 +670,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = context->request_key_auth; + key = t->request_key_auth; if (!key) goto error; @@ -677,6 +678,25 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, key_ref = make_key_ref(key, 1); break; + case KEY_SPEC_REQUESTOR_KEYRING: + if (!t->request_key_auth) + goto error; + + down_read(&t->request_key_auth->sem); + if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + key_ref = ERR_PTR(-EKEYREVOKED); + key = NULL; + } else { + rka = t->request_key_auth->payload.data; + key = rka->dest_keyring; + atomic_inc(&key->usage); + } + up_read(&t->request_key_auth->sem); + if (!key) + goto error; + key_ref = make_key_ref(key, 1); + break; + default: key_ref = ERR_PTR(-EINVAL); if (id < 1) @@ -725,7 +745,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, context, perm); + ret = key_task_permission(key_ref, t, perm); if (ret < 0) goto invalid_key; @@ -754,7 +774,7 @@ long join_session_keyring(const char *name) /* if no name is provided, install an anonymous keyring */ if (!name) { - ret = install_session_keyring(tsk, NULL); + ret = install_session_keyring(NULL); if (ret < 0) goto error; @@ -784,7 +804,7 @@ long join_session_keyring(const char *name) } /* we've got a keyring - now to install it */ - ret = install_session_keyring(tsk, keyring); + ret = install_session_keyring(keyring); if (ret < 0) goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 91953c814497..8e9d93b4a402 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -76,6 +76,10 @@ static int call_sbin_request_key(struct key_construction *cons, kenter("{%d},{%d},%s", key->serial, authkey->serial, op); + ret = install_user_keyrings(); + if (ret < 0) + goto error_alloc; + /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); @@ -165,7 +169,8 @@ error_alloc: * - we ignore program failure and go on key status instead */ static int construct_key(struct key *key, const void *callout_info, - size_t callout_len, void *aux) + size_t callout_len, void *aux, + struct key *dest_keyring) { struct key_construction *cons; request_key_actor_t actor; @@ -179,7 +184,8 @@ static int construct_key(struct key *key, const void *callout_info, return -ENOMEM; /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info, callout_len); + authkey = request_key_auth_new(key, callout_info, callout_len, + dest_keyring); if (IS_ERR(authkey)) { kfree(cons); ret = PTR_ERR(authkey); @@ -207,27 +213,48 @@ static int construct_key(struct key *key, const void *callout_info, } /* - * link a key to the appropriate destination keyring - * - the caller must hold a write lock on the destination keyring + * get the appropriate destination keyring for the request + * - we return whatever keyring we select with an extra reference upon it which + * the caller must release */ -static void construct_key_make_link(struct key *key, struct key *dest_keyring) +static void construct_get_dest_keyring(struct key **_dest_keyring) { + struct request_key_auth *rka; struct task_struct *tsk = current; - struct key *drop = NULL; + struct key *dest_keyring = *_dest_keyring, *authkey; - kenter("{%d},%p", key->serial, dest_keyring); + kenter("%p", dest_keyring); /* find the appropriate keyring */ - if (!dest_keyring) { + if (dest_keyring) { + /* the caller supplied one */ + key_get(dest_keyring); + } else { + /* use a default keyring; falling through the cases until we + * find one that we actually have */ switch (tsk->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: + case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: + if (tsk->request_key_auth) { + authkey = tsk->request_key_auth; + down_read(&authkey->sem); + rka = authkey->payload.data; + if (!test_bit(KEY_FLAG_REVOKED, + &authkey->flags)) + dest_keyring = + key_get(rka->dest_keyring); + up_read(&authkey->sem); + if (dest_keyring) + break; + } + case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = tsk->thread_keyring; + dest_keyring = key_get(tsk->thread_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = tsk->signal->process_keyring; + dest_keyring = key_get(tsk->signal->process_keyring); if (dest_keyring) break; @@ -236,17 +263,16 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring) dest_keyring = key_get( rcu_dereference(tsk->signal->session_keyring)); rcu_read_unlock(); - drop = dest_keyring; if (dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = tsk->user->session_keyring; + dest_keyring = key_get(tsk->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = tsk->user->uid_keyring; + dest_keyring = key_get(tsk->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: @@ -255,10 +281,9 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring) } } - /* and attach the key to it */ - __key_link(dest_keyring, key); - key_put(drop); - kleave(""); + *_dest_keyring = dest_keyring; + kleave(" [dk %d]", key_serial(dest_keyring)); + return; } /* @@ -288,8 +313,7 @@ static int construct_alloc_key(struct key_type *type, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); - if (dest_keyring) - down_write(&dest_keyring->sem); + down_write(&dest_keyring->sem); /* attach the key to the destination keyring under lock, but we do need * to do another check just in case someone beat us to it whilst we @@ -301,12 +325,10 @@ static int construct_alloc_key(struct key_type *type, if (!IS_ERR(key_ref)) goto key_already_present; - if (dest_keyring) - construct_key_make_link(key, dest_keyring); + __key_link(dest_keyring, key); mutex_unlock(&key_construction_mutex); - if (dest_keyring) - up_write(&dest_keyring->sem); + up_write(&dest_keyring->sem); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -348,21 +370,26 @@ static struct key *construct_key_and_link(struct key_type *type, if (!user) return ERR_PTR(-ENOMEM); + construct_get_dest_keyring(&dest_keyring); + ret = construct_alloc_key(type, description, dest_keyring, flags, user, &key); key_user_put(user); if (ret == 0) { - ret = construct_key(key, callout_info, callout_len, aux); + ret = construct_key(key, callout_info, callout_len, aux, + dest_keyring); if (ret < 0) goto construction_failed; } + key_put(dest_keyring); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); + key_put(dest_keyring); return ERR_PTR(ret); } diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 729156b3485e..1762d44711d5 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -128,6 +128,7 @@ static void request_key_auth_destroy(struct key *key) } key_put(rka->target_key); + key_put(rka->dest_keyring); kfree(rka->callout_info); kfree(rka); @@ -139,7 +140,7 @@ static void request_key_auth_destroy(struct key *key) * access to the caller's security data */ struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len) + size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; struct key *authkey = NULL; @@ -188,6 +189,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, } rka->target_key = key_get(target); + rka->dest_keyring = key_get(dest_keyring); memcpy(rka->callout_info, callout_info, callout_len); rka->callout_len = callout_len; @@ -223,6 +225,7 @@ error_inst: key_put(authkey); error_alloc: key_put(rka->target_key); + key_put(rka->dest_keyring); kfree(rka->callout_info); kfree(rka); kleave("= %d", ret); -- cgit v1.2.3 From 1cdcbec1a3372c0c49c59d292e708fd07b509f18 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: CRED: Neuter sys_capset() Take away the ability for sys_capset() to affect processes other than current. This means that current will not need to lock its own credentials when reading them against interference by other processes. This has effectively been the case for a while anyway, since: (1) Without LSM enabled, sys_capset() is disallowed. (2) With file-based capabilities, sys_capset() is neutered. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: Andrew G. Morgan Acked-by: James Morris Signed-off-by: James Morris --- fs/open.c | 12 +-- include/linux/security.h | 48 ++++------ kernel/capability.c | 227 +++++------------------------------------------ security/commoncap.c | 29 ++---- security/security.c | 18 ++-- security/selinux/hooks.c | 10 +-- 6 files changed, 61 insertions(+), 283 deletions(-) (limited to 'kernel') diff --git a/fs/open.c b/fs/open.c index 83cdb9dee0c1..500cc0c54762 100644 --- a/fs/open.c +++ b/fs/open.c @@ -441,17 +441,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) current->fsgid = current->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { - /* - * Clear the capabilities if we switch to a non-root user - */ -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - /* - * FIXME: There is a race here against sys_capset. The - * capabilities can change yet we will restore the old - * value below. We should hold task_capabilities_lock, - * but we cannot because user_path_at can sleep. - */ -#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ + /* Clear the capabilities if we switch to a non-root user */ if (current->uid) old_cap = cap_set_effective(__cap_empty_set); else diff --git a/include/linux/security.h b/include/linux/security.h index 5fe28a671cd3..d1ce8beddbd7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,8 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); @@ -1191,24 +1191,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if the capability sets were successfully obtained. * @capset_check: * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the @target process. - * Caveat: @target is also set to current if a set of processes is - * specified (i.e. all processes other than current and init or a - * particular process group). Hence, the capset_set hook may need to - * revalidate permission to the actual target process. - * @target contains the task_struct structure for target process. + * @permitted capability sets for the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if permission is granted. * @capset_set: * Set the @effective, @inheritable, and @permitted capability sets for - * the @target process. Since capset_check cannot always check permission - * to the real @target process, this hook may also perform permission - * checking to determine if the current process is allowed to set the - * capability sets of the @target process. However, this hook has no way - * of returning an error due to the structure of the sys_capset code. - * @target contains the task_struct structure for target process. + * the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. @@ -1303,12 +1293,10 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (struct task_struct *target, - kernel_cap_t *effective, + int (*capset_check) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - void (*capset_set) (struct task_struct *target, - kernel_cap_t *effective, + void (*capset_set) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); @@ -1572,12 +1560,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, +int security_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, +void security_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); @@ -1769,20 +1755,18 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline int security_capset_check(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - return cap_capset_check(target, effective, inheritable, permitted); + return cap_capset_check(effective, inheritable, permitted); } -static inline void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline void security_capset_set(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - cap_capset_set(target, effective, inheritable, permitted); + cap_capset_set(effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) diff --git a/kernel/capability.c b/kernel/capability.c index adb262f83de1..58b00519624a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -127,160 +127,6 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) return 0; } -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - -/* - * Without filesystem capability support, we nominally support one process - * setting the capabilities of another - */ -static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, - kernel_cap_t *pIp, kernel_cap_t *pPp) -{ - struct task_struct *target; - int ret; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - if (pid && pid != task_pid_vnr(current)) { - target = find_task_by_vpid(pid); - if (!target) { - ret = -ESRCH; - goto out; - } - } else - target = current; - - ret = security_capget(target, pEp, pIp, pPp); - -out: - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - return ret; -} - -/* - * cap_set_pg - set capabilities for all processes in a given process - * group. We call this holding task_capability_lock and tasklist_lock. - */ -static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *g, *target; - int ret = -EPERM; - int found = 0; - struct pid *pgrp; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - pgrp = find_vpid(pgrp_nr); - do_each_pid_task(pgrp, PIDTYPE_PGID, g) { - target = g; - while_each_thread(g, target) { - if (!security_capset_check(target, effective, - inheritable, permitted)) { - security_capset_set(target, effective, - inheritable, permitted); - ret = 0; - } - found = 1; - } - } while_each_pid_task(pgrp, PIDTYPE_PGID, g); - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - if (!found) - ret = 0; - return ret; -} - -/* - * cap_set_all - set capabilities for all processes other than init - * and self. We call this holding task_capability_lock and tasklist_lock. - */ -static inline int cap_set_all(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *g, *target; - int ret = -EPERM; - int found = 0; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - do_each_thread(g, target) { - if (target == current - || is_container_init(target->group_leader)) - continue; - found = 1; - if (security_capset_check(target, effective, inheritable, - permitted)) - continue; - ret = 0; - security_capset_set(target, effective, inheritable, permitted); - } while_each_thread(g, target); - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - if (!found) - ret = 0; - - return ret; -} - -/* - * Given the target pid does not refer to the current process we - * need more elaborate support... (This support is not present when - * filesystem capabilities are configured.) - */ -static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *target; - int ret; - - if (!capable(CAP_SETPCAP)) - return -EPERM; - - if (pid == -1) /* all procs other than current and init */ - return cap_set_all(effective, inheritable, permitted); - - else if (pid < 0) /* all procs in process group */ - return cap_set_pg(-pid, effective, inheritable, permitted); - - /* target != current */ - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - target = find_task_by_vpid(pid); - if (!target) - ret = -ESRCH; - else { - ret = security_capset_check(target, effective, inheritable, - permitted); - - /* having verified that the proposed changes are legal, - we now put them into effect. */ - if (!ret) - security_capset_set(target, effective, inheritable, - permitted); - } - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - return ret; -} - -#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */ - /* * If we have configured with filesystem capability support, then the * only thing that can change the capabilities of the current process @@ -314,22 +160,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, return ret; } -/* - * With filesystem capability support configured, the kernel does not - * permit the changing of capabilities in one process by another - * process. (CAP_SETPCAP has much less broad semantics when configured - * this way.) - */ -static inline int do_sys_capset_other_tasks(pid_t pid, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - return -EPERM; -} - -#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ - /* * Atomically modify the effective capabilities returning the original * value. No permission check is performed here - it is assumed that the @@ -424,16 +254,14 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * @data: pointer to struct that contains the effective, permitted, * and inheritable capabilities * - * Set capabilities for a given process, all processes, or all - * processes in a given process group. + * Set capabilities for the current process only. The ability to any other + * process(es) has been deprecated and removed. * * The restrictions on setting capabilities are specified as: * - * [pid is for the 'target' task. 'current' is the calling task.] - * - * I: any raised capabilities must be a subset of the (old current) permitted - * P: any raised capabilities must be a subset of the (old current) permitted - * E: must be set to a subset of (new target) permitted + * I: any raised capabilities must be a subset of the old permitted + * P: any raised capabilities must be a subset of the old permitted + * E: must be set to a subset of new permitted * * Returns 0 on success and < 0 on error. */ @@ -452,10 +280,13 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (get_user(pid, &header->pid)) return -EFAULT; + /* may only affect current now */ + if (pid != 0 && pid != task_pid_vnr(current)) + return -EPERM; + if (copy_from_user(&kdata, data, tocopy - * sizeof(struct __user_cap_data_struct))) { + * sizeof(struct __user_cap_data_struct))) return -EFAULT; - } for (i = 0; i < tocopy; i++) { effective.cap[i] = kdata[i].effective; @@ -473,32 +304,20 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (ret) return ret; - if (pid && (pid != task_pid_vnr(current))) - ret = do_sys_capset_other_tasks(pid, &effective, &inheritable, - &permitted); - else { - /* - * This lock is required even when filesystem - * capability support is configured - it protects the - * sys_capget() call from returning incorrect data in - * the case that the targeted process is not the - * current one. - */ - spin_lock(&task_capability_lock); - - ret = security_capset_check(current, &effective, &inheritable, - &permitted); - /* - * Having verified that the proposed changes are - * legal, we now put them into effect. - */ - if (!ret) - security_capset_set(current, &effective, &inheritable, - &permitted); - spin_unlock(&task_capability_lock); - } - + /* This lock is required even when filesystem capability support is + * configured - it protects the sys_capget() call from returning + * incorrect data in the case that the targeted process is not the + * current one. + */ + spin_lock(&task_capability_lock); + ret = security_capset_check(&effective, &inheritable, &permitted); + /* Having verified that the proposed changes are legal, we now put them + * into effect. + */ + if (!ret) + security_capset_set(&effective, &inheritable, &permitted); + spin_unlock(&task_capability_lock); return ret; } diff --git a/security/commoncap.c b/security/commoncap.c index 8283271f0768..e3f36ef629fa 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -96,15 +96,6 @@ int cap_capget (struct task_struct *target, kernel_cap_t *effective, #ifdef CONFIG_SECURITY_FILE_CAPABILITIES -static inline int cap_block_setpcap(struct task_struct *target) -{ - /* - * No support for remote process capability manipulation with - * filesystem capability support. - */ - return (target != current); -} - static inline int cap_inh_is_capped(void) { /* @@ -119,7 +110,6 @@ static inline int cap_limit_ptraced_target(void) { return 1; } #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ -static inline int cap_block_setpcap(struct task_struct *t) { return 0; } static inline int cap_inh_is_capped(void) { return 1; } static inline int cap_limit_ptraced_target(void) { @@ -128,21 +118,18 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, +int cap_capset_check (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - if (cap_block_setpcap(target)) { - return -EPERM; - } if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(target->cap_inheritable, + cap_combine(current->cap_inheritable, current->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(target->cap_inheritable, + cap_combine(current->cap_inheritable, current->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; @@ -150,7 +137,7 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (target->cap_permitted, + cap_combine (current->cap_permitted, current->cap_permitted))) { return -EPERM; } @@ -163,12 +150,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, return 0; } -void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, +void cap_capset_set (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - target->cap_effective = *effective; - target->cap_inheritable = *inheritable; - target->cap_permitted = *permitted; + current->cap_effective = *effective; + current->cap_inheritable = *inheritable; + current->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) diff --git a/security/security.c b/security/security.c index 346f21e0ec2c..dca37381e2a7 100644 --- a/security/security.c +++ b/security/security.c @@ -145,20 +145,18 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +int security_capset_check(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - return security_ops->capset_check(target, effective, inheritable, permitted); + return security_ops->capset_check(effective, inheritable, permitted); } -void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +void security_capset_set(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - security_ops->capset_set(target, effective, inheritable, permitted); + security_ops->capset_set(effective, inheritable, permitted); } int security_capable(struct task_struct *tsk, int cap) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 378dc53c08e8..df9986940e9c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1790,22 +1790,22 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(struct task_struct *target, kernel_cap_t *effective, +static int selinux_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { int error; - error = secondary_ops->capset_check(target, effective, inheritable, permitted); + error = secondary_ops->capset_check(effective, inheritable, permitted); if (error) return error; - return task_has_perm(current, target, PROCESS__SETCAP); + return task_has_perm(current, current, PROCESS__SETCAP); } -static void selinux_capset_set(struct task_struct *target, kernel_cap_t *effective, +static void selinux_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - secondary_ops->capset_set(target, effective, inheritable, permitted); + secondary_ops->capset_set(effective, inheritable, permitted); } static int selinux_capable(struct task_struct *tsk, int cap, int audit) -- cgit v1.2.3 From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/alpha/kernel/asm-offsets.c | 11 +- arch/alpha/kernel/entry.S | 10 +- arch/ia64/ia32/sys_ia32.c | 8 +- arch/mips/kernel/kspd.c | 4 +- arch/s390/kernel/compat_linux.c | 28 ++--- drivers/connector/cn_proc.c | 8 +- fs/binfmt_elf.c | 12 +- fs/binfmt_elf_fdpic.c | 12 +- fs/exec.c | 4 +- fs/fcntl.c | 4 +- fs/file_table.c | 4 +- fs/fuse/dir.c | 12 +- fs/hugetlbfs/inode.c | 4 +- fs/ioprio.c | 12 +- fs/nfsd/auth.c | 22 ++-- fs/nfsd/nfs4recover.c | 12 +- fs/nfsd/nfsfh.c | 6 +- fs/open.c | 17 +-- fs/proc/array.c | 18 +-- fs/proc/base.c | 16 +-- fs/xfs/linux-2.6/xfs_cred.h | 6 +- fs/xfs/linux-2.6/xfs_globals.h | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_vnodeops.h | 10 +- include/linux/cred.h | 155 +++++++++++++++++++---- include/linux/init_task.h | 24 ++-- include/linux/sched.h | 52 +------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/auditsc.c | 52 ++++---- kernel/capability.c | 4 +- kernel/cgroup.c | 4 +- kernel/exit.c | 10 +- kernel/fork.c | 24 ++-- kernel/futex.c | 6 +- kernel/futex_compat.c | 5 +- kernel/ptrace.c | 19 +-- kernel/sched.c | 10 +- kernel/signal.c | 16 +-- kernel/sys.c | 266 ++++++++++++++++++++++----------------- kernel/trace/trace.c | 2 +- kernel/tsacct.c | 4 +- kernel/uid16.c | 28 ++--- kernel/user.c | 4 +- mm/mempolicy.c | 10 +- mm/migrate.c | 10 +- mm/oom_kill.c | 2 +- net/core/scm.c | 10 +- net/sunrpc/auth.c | 2 +- security/commoncap.c | 161 +++++++++++++----------- security/keys/keyctl.c | 25 ++-- security/keys/permission.c | 11 +- security/keys/process_keys.c | 98 ++++++++------- security/keys/request_key.c | 18 +-- security/keys/request_key_auth.c | 12 +- security/selinux/exports.c | 2 +- security/selinux/hooks.c | 116 ++++++++--------- security/selinux/selinuxfs.c | 2 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 4 +- security/smack/smack_lsm.c | 77 ++++++------ security/smack/smackfs.c | 6 +- 63 files changed, 832 insertions(+), 677 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4b18cd94d59d..6ff8886e7e22 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,15 +19,18 @@ void foo(void) BLANK(); DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); - DEFINE(TASK_UID, offsetof(struct task_struct, uid)); - DEFINE(TASK_EUID, offsetof(struct task_struct, euid)); - DEFINE(TASK_GID, offsetof(struct task_struct, gid)); - DEFINE(TASK_EGID, offsetof(struct task_struct, egid)); + DEFINE(TASK_CRED, offsetof(struct task_struct, cred)); DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent)); DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader)); DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); BLANK(); + DEFINE(CRED_UID, offsetof(struct cred, uid)); + DEFINE(CRED_EUID, offsetof(struct cred, euid)); + DEFINE(CRED_GID, offsetof(struct cred, gid)); + DEFINE(CRED_EGID, offsetof(struct cred, egid)); + BLANK(); + DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 5fc61e281ac7..f77345bc66a9 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -850,8 +850,9 @@ osf_getpriority: sys_getxuid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_UID($2) - ldl $1, TASK_EUID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_UID($3) + ldl $1, CRED_EUID($3) stq $1, 80($sp) ret .end sys_getxuid @@ -862,8 +863,9 @@ sys_getxuid: sys_getxgid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_GID($2) - ldl $1, TASK_EGID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_GID($3) + ldl $1, CRED_EGID($3) stq $1, 80($sp) ret .end sys_getxgid diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 5e92ae00bdbb..2445a9d3488e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1772,20 +1772,20 @@ sys32_getgroups16 (int gidsetsize, short __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index b0591ae0ce56..fd6e51224034 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -174,8 +174,8 @@ static unsigned int translate_open_flags(int flags) static void sp_setfsuidgid( uid_t uid, gid_t gid) { - current->fsuid = uid; - current->fsgid = gid; + current->cred->fsuid = uid; + current->cred->fsgid = gid; key_fsuid_changed(current); key_fsgid_changed(current); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 4646382af34f..6cc87d8c8682 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } /* diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 5c9f67f98d10..354c1ff17159 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -116,11 +116,11 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->uid; - ev->event_data.id.e.euid = task->euid; + ev->event_data.id.r.ruid = task->cred->uid; + ev->event_data.id.e.euid = task->cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->gid; - ev->event_data.id.e.egid = task->egid; + ev->event_data.id.r.rgid = task->cred->gid; + ev->event_data.id.e.egid = task->cred->egid; } else return; get_seq(&msg->seq, &ev->cpu); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8fcfa398d350..7a52477ce493 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->uid); - NEW_AUX_ENT(AT_EUID, tsk->euid); - NEW_AUX_ENT(AT_GID, tsk->gid); - NEW_AUX_ENT(AT_EGID, tsk->egid); + NEW_AUX_ENT(AT_UID, tsk->cred->uid); + NEW_AUX_ENT(AT_EUID, tsk->cred->euid); + NEW_AUX_ENT(AT_GID, tsk->cred->gid); + NEW_AUX_ENT(AT_EGID, tsk->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { @@ -1388,8 +1388,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 488584c87512..9f67054c2c4e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current_uid()); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current_euid()); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current_gid()); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current_egid()); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1440,8 +1440,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/exec.c b/fs/exec.c index 604834f3b208..31149e430a89 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1738,7 +1738,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->fsuid = 0; /* Dump root private */ + current->cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); @@ -1834,7 +1834,7 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->fsuid = fsuid; + current->cred->fsuid = fsuid; coredump_finish(mm); fail: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index bf049a805e59..63964d863ad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,8 +401,8 @@ static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && + (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || + (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); } diff --git a/fs/file_table.c b/fs/file_table.c index 5ad0eca6eea2..3152b53cfab0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -122,8 +122,8 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->fsuid; - f->f_gid = tsk->fsgid; + f->f_uid = tsk->cred->fsuid; + f->f_gid = tsk->cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fd03330cadeb..e97a98981862 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -872,12 +872,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->euid == fc->user_id && - task->suid == fc->user_id && - task->uid == fc->user_id && - task->egid == fc->group_id && - task->sgid == fc->group_id && - task->gid == fc->group_id) + if (task->cred->euid == fc->user_id && + task->cred->suid == fc->user_id && + task->cred->uid == fc->user_id && + task->cred->egid == fc->group_id && + task->cred->sgid == fc->group_id && + task->cred->gid == fc->group_id) return 1; return 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 08ad76c79b49..870a721b8bd2 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -958,7 +958,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->user)) + if (!user_shm_lock(size, current->cred->user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +998,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->user); + user_shm_unlock(size, current->cred->user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index 68d2cd807118..bb5210af77c2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) int err; struct io_context *ioc; - if (task->uid != current_euid() && - task->uid != current_uid() && !capable(CAP_SYS_NICE)) + if (task->cred->uid != current_euid() && + task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) return -EPERM; err = security_task_setioprio(task, ioprio); @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -131,7 +131,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->uid != who) + if (p->cred->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->uid != user->uid) + if (p->cred->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 294992e9bf69..808fc03a6fbd 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,6 +27,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { + struct cred *act_as = current->cred ; struct svc_cred cred = rqstp->rq_cred; int i; int flags = nfsexp_flags(rqstp, exp); @@ -55,25 +56,26 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + act_as->fsuid = cred.cr_uid; else - current->fsuid = exp->ex_anon_uid; + act_as->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + act_as->fsgid = cred.cr_gid; else - current->fsgid = exp->ex_anon_gid; + act_as->fsgid = exp->ex_anon_gid; if (!cred.cr_group_info) return -ENOMEM; - ret = set_current_groups(cred.cr_group_info); + ret = set_groups(act_as, cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - current->cap_effective = - cap_drop_nfsd_set(current->cap_effective); + act_as->cap_effective = + cap_drop_nfsd_set(act_as->cap_effective); } else { - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + act_as->cap_effective = + cap_raise_nfsd_set(act_as->cap_effective, + act_as->cap_permitted); } return ret; } + diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb93946ace22..a5e14e8695ea 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -57,17 +57,17 @@ static int rec_dir_init = 0; static void nfs4_save_user(uid_t *saveuid, gid_t *savegid) { - *saveuid = current->fsuid; - *savegid = current->fsgid; - current->fsuid = 0; - current->fsgid = 0; + *saveuid = current->cred->fsuid; + *savegid = current->cred->fsgid; + current->cred->fsuid = 0; + current->cred->fsgid = 0; } static void nfs4_reset_user(uid_t saveuid, gid_t savegid) { - current->fsuid = saveuid; - current->fsgid = savegid; + current->cred->fsuid = saveuid; + current->cred->fsgid = savegid; } static void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cd25d91895a1..e67cfaea0865 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + current->cred->cap_effective = + cap_raise_nfsd_set(current->cred->cap_effective, + current->cred->cap_permitted); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index 500cc0c54762..b1238e195e7e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,6 +425,7 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { + struct cred *cred = current->cred; struct path path; struct inode *inode; int old_fsuid, old_fsgid; @@ -434,18 +435,18 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = current->fsuid; - old_fsgid = current->fsgid; + old_fsuid = cred->fsuid; + old_fsgid = cred->fsgid; - current->fsuid = current->uid; - current->fsgid = current->gid; + cred->fsuid = cred->uid; + cred->fsgid = cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->uid) + if (current->cred->uid) old_cap = cap_set_effective(__cap_empty_set); else - old_cap = cap_set_effective(current->cap_permitted); + old_cap = cap_set_effective(cred->cap_permitted); } res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); @@ -484,8 +485,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - current->fsuid = old_fsuid; - current->fsgid = old_fsgid; + cred->fsuid = old_fsuid; + cred->fsgid = old_fsgid; if (!issecure(SECURE_NO_SETUID_FIXUP)) cap_set_effective(old_cap); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6af7fba7abb1..62fe9b2009b6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -182,8 +182,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, + p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); task_lock(p); if (p->files) @@ -194,7 +194,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->group_info; + group_info = p->cred->group_info; get_group_info(group_info); task_unlock(p); @@ -262,7 +262,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->user->sigpending); + qsize = atomic_read(&p->cred->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,10 +293,12 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - render_cap_t(m, "CapInh:\t", &p->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &p->cap_permitted); - render_cap_t(m, "CapEff:\t", &p->cap_effective); - render_cap_t(m, "CapBnd:\t", &p->cap_bset); + struct cred *cred = p->cred; + + render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); + render_cap_t(m, "CapEff:\t", &cred->cap_effective); + render_cap_t(m, "CapBnd:\t", &cred->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..6862b360c36c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1428,8 +1428,8 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } security_task_to_inode(task, inode); @@ -1454,8 +1454,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + stat->uid = task->cred->euid; + stat->gid = task->cred->egid; } } rcu_read_unlock(); @@ -1486,8 +1486,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1658,8 +1658,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 293043a5573a..8c022cd0ad67 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -23,11 +23,9 @@ /* * Credentials */ -typedef struct cred { - /* EMPTY */ -} cred_t; +typedef const struct cred cred_t; -extern struct cred *sys_cred; +extern cred_t *sys_cred; /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ static inline int capable_cred(cred_t *cr, int cid) diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee8..6eda8a3eb6f1 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,6 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; +extern cred_t *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49674d7..6be310d41daf 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -497,7 +497,7 @@ int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, + xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode_core *); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec54..7b0c2ab88333 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -16,7 +16,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); + cred_t *credp); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -28,24 +28,24 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); + xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, - mode_t mode, struct xfs_inode **ipp, struct cred *credp); + mode_t mode, struct xfs_inode **ipp, cred_t *credp); int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, mode_t mode, struct xfs_inode **ipp, - struct cred *credp); + cred_t *credp); int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + cred_t *credp, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); diff --git a/include/linux/cred.h b/include/linux/cred.h index b69222cc1fd2..3e65587a72e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,39 +12,150 @@ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H -#define get_current_user() (get_uid(current->user)) - -#define task_uid(task) ((task)->uid) -#define task_gid(task) ((task)->gid) -#define task_euid(task) ((task)->euid) -#define task_egid(task) ((task)->egid) - -#define current_uid() (current->uid) -#define current_gid() (current->gid) -#define current_euid() (current->euid) -#define current_egid() (current->egid) -#define current_suid() (current->suid) -#define current_sgid() (current->sgid) -#define current_fsuid() (current->fsuid) -#define current_fsgid() (current->fsgid) -#define current_cap() (current->cap_effective) +#include +#include +#include + +struct user_struct; +struct cred; + +/* + * COW Supplementary groups list + */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) + +struct group_info { + atomic_t usage; + int ngroups; + int nblocks; + gid_t small_block[NGROUPS_SMALL]; + gid_t *blocks[0]; +}; + +/** + * get_group_info - Get a reference to a group info structure + * @group_info: The group info to reference + * + * This must be called with the owning task locked (via task_lock()) when task + * != current. The reason being that the vast majority of callers are looking + * at current->group_info, which can not be changed except by the current task. + * Changing current->group_info requires the task lock, too. + */ +#define get_group_info(group_info) \ +do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +/** + * put_group_info - Release a reference to a group info structure + * @group_info: The group info to release + */ +#define put_group_info(group_info) \ +do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +extern struct group_info *groups_alloc(int); +extern void groups_free(struct group_info *); +extern int set_current_groups(struct group_info *); +extern int set_groups(struct cred *, struct group_info *); +extern int groups_search(struct group_info *, gid_t); + +/* access the groups "array" with this macro */ +#define GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +/* + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * Note that some members of this structure belong to both categories - the + * LSM security pointer for instance. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + */ +struct cred { + atomic_t usage; + uid_t uid; /* real UID of the task */ + gid_t gid; /* real GID of the task */ + uid_t suid; /* saved UID of the task */ + gid_t sgid; /* saved GID of the task */ + uid_t euid; /* effective UID of the task */ + gid_t egid; /* effective GID of the task */ + uid_t fsuid; /* UID for VFS ops */ + gid_t fsgid; /* GID for VFS ops */ + unsigned securebits; /* SUID-less security management */ + kernel_cap_t cap_inheritable; /* caps our children can inherit */ + kernel_cap_t cap_permitted; /* caps we're permitted */ + kernel_cap_t cap_effective; /* caps we can actually use */ + kernel_cap_t cap_bset; /* capability bounding set */ +#ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested + * keys to */ + struct key *thread_keyring; /* keyring private to this thread */ + struct key *request_key_auth; /* assumed request_key authority */ +#endif +#ifdef CONFIG_SECURITY + void *security; /* subjective LSM security */ +#endif + struct user_struct *user; /* real user ID subscription */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ + struct rcu_head rcu; /* RCU deletion hook */ + spinlock_t lock; /* lock for pointer changes */ +}; + +#define get_current_user() (get_uid(current->cred->user)) + +#define task_uid(task) ((task)->cred->uid) +#define task_gid(task) ((task)->cred->gid) +#define task_euid(task) ((task)->cred->euid) +#define task_egid(task) ((task)->cred->egid) + +#define current_uid() (current->cred->uid) +#define current_gid() (current->cred->gid) +#define current_euid() (current->cred->euid) +#define current_egid() (current->cred->egid) +#define current_suid() (current->cred->suid) +#define current_sgid() (current->cred->sgid) +#define current_fsuid() (current->cred->fsuid) +#define current_fsgid() (current->cred->fsgid) +#define current_cap() (current->cred->cap_effective) #define current_uid_gid(_uid, _gid) \ do { \ - *(_uid) = current->uid; \ - *(_gid) = current->gid; \ + *(_uid) = current->cred->uid; \ + *(_gid) = current->cred->gid; \ } while(0) #define current_euid_egid(_uid, _gid) \ do { \ - *(_uid) = current->euid; \ - *(_gid) = current->egid; \ + *(_uid) = current->cred->euid; \ + *(_gid) = current->cred->egid; \ } while(0) #define current_fsuid_fsgid(_uid, _gid) \ do { \ - *(_uid) = current->fsuid; \ - *(_gid) = current->fsgid; \ + *(_uid) = current->cred->fsuid; \ + *(_gid) = current->cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..9de41ccd67b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,21 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +extern struct cred init_cred; + +#define INIT_CRED(p) \ +{ \ + .usage = ATOMIC_INIT(3), \ + .securebits = SECUREBITS_DEFAULT, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_bset = CAP_INIT_BSET, \ + .user = INIT_USER, \ + .group_info = &init_groups, \ + .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ +} + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -147,13 +162,8 @@ extern struct group_info init_groups; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .group_info = &init_groups, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .securebits = SECUREBITS_DEFAULT, \ - .user = INIT_USER, \ + .__temp_cred = INIT_CRED(tsk.__temp_cred), \ + .cred = &tsk.__temp_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c8b92502354d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) + struct backing_dev_info; struct reclaim_state; @@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); @@ -1181,17 +1151,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif + struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ + struct cred *cred; /* actual/objective task credentials */ + char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1228,9 +1190,6 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; @@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk, extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 92f09bdf1175..6d389491bfa2 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->securebits) +#define issecure(X) (issecure_mask(X) & current->cred->securebits) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index abda5991d7e3..e1885b494bac 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -126,7 +126,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->user; + struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 0c3debbe32d5..264a9d33c5dd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->user; + shp->mlock_user = current->cred->user; } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; + struct user_struct *user = current->cred->user; if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9c7e47ae4576..2febf5165fad 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { + struct cred *cred = tsk->cred; int i, j, need_sid = 1; u32 sid; @@ -466,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_UID: - result = audit_comparator(tsk->uid, f->op, f->val); + result = audit_comparator(cred->uid, f->op, f->val); break; case AUDIT_EUID: - result = audit_comparator(tsk->euid, f->op, f->val); + result = audit_comparator(cred->euid, f->op, f->val); break; case AUDIT_SUID: - result = audit_comparator(tsk->suid, f->op, f->val); + result = audit_comparator(cred->suid, f->op, f->val); break; case AUDIT_FSUID: - result = audit_comparator(tsk->fsuid, f->op, f->val); + result = audit_comparator(cred->fsuid, f->op, f->val); break; case AUDIT_GID: - result = audit_comparator(tsk->gid, f->op, f->val); + result = audit_comparator(cred->gid, f->op, f->val); break; case AUDIT_EGID: - result = audit_comparator(tsk->egid, f->op, f->val); + result = audit_comparator(cred->egid, f->op, f->val); break; case AUDIT_SGID: - result = audit_comparator(tsk->sgid, f->op, f->val); + result = audit_comparator(cred->sgid, f->op, f->val); break; case AUDIT_FSGID: - result = audit_comparator(tsk->fsgid, f->op, f->val); + result = audit_comparator(cred->fsgid, f->op, f->val); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); @@ -1228,6 +1229,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { + struct cred *cred = tsk->cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1237,14 +1239,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = tsk->uid; - context->gid = tsk->gid; - context->euid = tsk->euid; - context->suid = tsk->suid; - context->fsuid = tsk->fsuid; - context->egid = tsk->egid; - context->sgid = tsk->sgid; - context->fsgid = tsk->fsgid; + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; + context->fsuid = cred->fsuid; + context->egid = cred->egid; + context->sgid = cred->sgid; + context->fsgid = cred->fsgid; context->personality = tsk->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); @@ -2086,7 +2088,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->uid, + task->pid, task->cred->uid, task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2469,7 +2471,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->uid; + context->target_uid = t->cred->uid; context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2495,7 +2497,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->uid; + audit_sig_uid = tsk->cred->uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2507,7 +2509,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->uid; + ctx->target_uid = t->cred->uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2528,7 +2530,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->uid; + axp->target_uid[axp->pid_count] = t->cred->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); @@ -2575,12 +2577,12 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.inheritable = current->cred->cap_inheritable; ax->old_pcap.effective = *pE; - ax->new_pcap.permitted = current->cap_permitted; - ax->new_pcap.inheritable = current->cap_inheritable; - ax->new_pcap.effective = current->cap_effective; + ax->new_pcap.permitted = current->cred->cap_permitted; + ax->new_pcap.inheritable = current->cred->cap_inheritable; + ax->new_pcap.effective = current->cred->cap_effective; } /** diff --git a/kernel/capability.c b/kernel/capability.c index 58b00519624a..a404b980b1bd 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -171,8 +171,8 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) spin_lock(&task_capability_lock); - pE_old = current->cap_effective; - current->cap_effective = pE_new; + pE_old = current->cred->cap_effective; + current->cred->cap_effective = pE_new; spin_unlock(&task_capability_lock); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 78f9b310c4f3..e210526e6401 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1293,7 +1293,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); euid = current_euid(); - if (euid && euid != tsk->uid && euid != tsk->suid) { + if (euid && + euid != tsk->cred->uid && + euid != tsk->cred->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..e0f6e1892fb9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,7 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->user->processes); + atomic_dec(&p->cred->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1272,7 +1272,7 @@ static int wait_task_zombie(struct task_struct *p, int options, return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->uid; + uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(p->cred->uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1458,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->uid; + uid = p->cred->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1535,7 +1535,7 @@ static int wait_task_continued(struct task_struct *p, int options, spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->uid; + uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..81fdc7733908 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + free_uid(tsk->__temp_cred.user); + put_group_info(tsk->__temp_cred.group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + p->cred = &p->__temp_cred; retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + atomic_inc(&p->cred->user->__count); + atomic_inc(&p->cred->user->processes); + get_group_info(p->cred->group_info); /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); #ifdef CONFIG_SECURITY - p->security = NULL; + p->cred->security = NULL; #endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + put_group_info(p->cred->group_info); + atomic_dec(&p->cred->user->processes); + free_uid(p->cred->user); bad_fork_free: free_task(p); fork_out: diff --git a/kernel/futex.c b/kernel/futex.c index e06962132aaf..28421d8210b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -443,7 +443,8 @@ static struct task_struct * futex_find_get_task(pid_t pid) rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->euid && euid != p->uid)) + if (!p || (euid != p->cred->euid && + euid != p->cred->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1846,7 +1847,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && + if (euid != p->cred->euid && + euid != p->cred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 3254d4e41e88..2c3fd5ed34f5 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && - !capable(CAP_SYS_PTRACE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; read_unlock(&tasklist_lock); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 937f6b5b2008..49849d12dd12 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { + struct cred *cred = current->cred, *tcred = task->cred; + /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. @@ -123,19 +125,18 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid; - gid_t gid; + uid_t uid = cred->uid; + gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - current_uid_gid(&uid, &gid); - if ((uid != task->euid || - uid != task->suid || - uid != task->uid || - gid != task->egid || - gid != task->sgid || - gid != task->gid) && !capable(CAP_SYS_PTRACE)) + if ((uid != tcred->euid || + uid != tcred->suid || + uid != tcred->uid || + gid != tcred->egid || + gid != tcred->sgid || + gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index c3b8b1fcde0d..733c59e645aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,7 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->user->tg; + tg = p->cred->user->tg; #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5182,8 +5182,8 @@ recheck: /* can't change other user's priorities */ euid = current_euid(); - if (euid != p->euid && - euid != p->uid) + if (euid != p->cred->euid && + euid != p->cred->uid) return -EPERM; } @@ -5417,7 +5417,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) euid = current_euid(); retval = -EPERM; - if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 167b535fe1a9..80e8a6489f97 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -187,7 +187,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * In order to avoid problems with "switch_user()", we want to make * sure that the compiler doesn't re-load "t->user" */ - user = t->user; + user = t->cred->user; barrier(); atomic_inc(&user->sigpending); if (override_rlimit || @@ -582,8 +582,8 @@ static int check_kill_permission(int sig, struct siginfo *info, uid = current_uid(); euid = current_euid(); - if ((euid ^ t->suid) && (euid ^ t->uid) && - (uid ^ t->suid) && (uid ^ t->uid) && + if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && + (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1100,8 +1100,8 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, goto out_unlock; } if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->suid) && (euid != p->uid) - && (uid != p->suid) && (uid != p->uid)) { + && (euid != p->cred->suid) && (euid != p->cred->uid) + && (uid != p->cred->suid) && (uid != p->cred->uid)) { ret = -EPERM; goto out_unlock; } @@ -1374,7 +1374,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); @@ -1445,7 +1445,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1713,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->uid; + info->si_uid = current->parent->cred->uid; } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index ed5c29c748ac..5d81f07c0150 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,7 +117,9 @@ static int set_one_prio(struct task_struct *p, int niceval, int error) uid_t euid = current_euid(); int no_nice; - if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { + if (p->cred->uid != euid && + p->cred->euid != euid && + !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -174,7 +176,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -182,7 +184,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) + if (p->cred->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); if (who != current_uid()) @@ -236,7 +238,7 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -244,7 +246,7 @@ asmlinkage long sys_getpriority(int which, int who) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) { + if (p->cred->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; @@ -472,8 +474,9 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - int old_rgid = current->gid; - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_rgid = cred->gid; + int old_egid = cred->egid; int new_rgid = old_rgid; int new_egid = old_egid; int retval; @@ -484,7 +487,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) if (rgid != (gid_t) -1) { if ((old_rgid == rgid) || - (current->egid==rgid) || + (cred->egid == rgid) || capable(CAP_SETGID)) new_rgid = rgid; else @@ -492,8 +495,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (egid != (gid_t) -1) { if ((old_rgid == egid) || - (current->egid == egid) || - (current->sgid == egid) || + (cred->egid == egid) || + (cred->sgid == egid) || capable(CAP_SETGID)) new_egid = egid; else @@ -505,10 +508,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) - current->sgid = new_egid; - current->fsgid = new_egid; - current->egid = new_egid; - current->gid = new_rgid; + cred->sgid = new_egid; + cred->fsgid = new_egid; + cred->egid = new_egid; + cred->gid = new_rgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); return 0; @@ -521,7 +524,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_egid = cred->egid; int retval; retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); @@ -533,13 +537,13 @@ asmlinkage long sys_setgid(gid_t gid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->gid = current->egid = current->sgid = current->fsgid = gid; - } else if ((gid == current->gid) || (gid == current->sgid)) { + cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; + } else if ((gid == cred->gid) || (gid == cred->sgid)) { if (old_egid != gid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = current->fsgid = gid; + cred->egid = cred->fsgid = gid; } else return -EPERM; @@ -570,7 +574,7 @@ static int set_user(uid_t new_ruid, int dumpclear) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->uid = new_ruid; + current->cred->uid = new_ruid; return 0; } @@ -591,6 +595,7 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { + struct cred *cred = current->cred; int old_ruid, old_euid, old_suid, new_ruid, new_euid; int retval; @@ -598,14 +603,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (retval) return retval; - new_ruid = old_ruid = current->uid; - new_euid = old_euid = current->euid; - old_suid = current->suid; + new_ruid = old_ruid = cred->uid; + new_euid = old_euid = cred->euid; + old_suid = cred->suid; if (ruid != (uid_t) -1) { new_ruid = ruid; if ((old_ruid != ruid) && - (current->euid != ruid) && + (cred->euid != ruid) && !capable(CAP_SETUID)) return -EPERM; } @@ -613,8 +618,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (euid != (uid_t) -1) { new_euid = euid; if ((old_ruid != euid) && - (current->euid != euid) && - (current->suid != euid) && + (cred->euid != euid) && + (cred->suid != euid) && !capable(CAP_SETUID)) return -EPERM; } @@ -626,11 +631,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = new_euid; + cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old_ruid)) - current->suid = current->euid; - current->fsuid = current->euid; + cred->suid = cred->euid; + cred->fsuid = cred->euid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -653,7 +658,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - int old_euid = current->euid; + struct cred *cred = current->cred; + int old_euid = cred->euid; int old_ruid, old_suid, new_suid; int retval; @@ -661,23 +667,23 @@ asmlinkage long sys_setuid(uid_t uid) if (retval) return retval; - old_ruid = current->uid; - old_suid = current->suid; + old_ruid = cred->uid; + old_suid = cred->suid; new_suid = old_suid; if (capable(CAP_SETUID)) { if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) return -EAGAIN; new_suid = uid; - } else if ((uid != current->uid) && (uid != new_suid)) + } else if ((uid != cred->uid) && (uid != new_suid)) return -EPERM; if (old_euid != uid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = uid; - current->suid = new_suid; + cred->fsuid = cred->euid = uid; + cred->suid = new_suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -692,9 +698,10 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - int old_ruid = current->uid; - int old_euid = current->euid; - int old_suid = current->suid; + struct cred *cred = current->cred; + int old_ruid = cred->uid; + int old_euid = cred->euid; + int old_suid = cred->suid; int retval; retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); @@ -702,30 +709,31 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return retval; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != current->uid) && - (ruid != current->euid) && (ruid != current->suid)) + if ((ruid != (uid_t) -1) && (ruid != cred->uid) && + (ruid != cred->euid) && (ruid != cred->suid)) return -EPERM; - if ((euid != (uid_t) -1) && (euid != current->uid) && - (euid != current->euid) && (euid != current->suid)) + if ((euid != (uid_t) -1) && (euid != cred->uid) && + (euid != cred->euid) && (euid != cred->suid)) return -EPERM; - if ((suid != (uid_t) -1) && (suid != current->uid) && - (suid != current->euid) && (suid != current->suid)) + if ((suid != (uid_t) -1) && (suid != cred->uid) && + (suid != cred->euid) && (suid != cred->suid)) return -EPERM; } if (ruid != (uid_t) -1) { - if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + if (ruid != cred->uid && + set_user(ruid, euid != cred->euid) < 0) return -EAGAIN; } if (euid != (uid_t) -1) { - if (euid != current->euid) { + if (euid != cred->euid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->euid = euid; + cred->euid = euid; } - current->fsuid = current->euid; + cred->fsuid = cred->euid; if (suid != (uid_t) -1) - current->suid = suid; + cred->suid = suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -735,11 +743,12 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->uid, ruid)) && - !(retval = put_user(current->euid, euid))) - retval = put_user(current->suid, suid); + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) + retval = put_user(cred->suid, suid); return retval; } @@ -749,6 +758,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { + struct cred *cred = current->cred; int retval; retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); @@ -756,28 +766,28 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return retval; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != current->gid) && - (rgid != current->egid) && (rgid != current->sgid)) + if ((rgid != (gid_t) -1) && (rgid != cred->gid) && + (rgid != cred->egid) && (rgid != cred->sgid)) return -EPERM; - if ((egid != (gid_t) -1) && (egid != current->gid) && - (egid != current->egid) && (egid != current->sgid)) + if ((egid != (gid_t) -1) && (egid != cred->gid) && + (egid != cred->egid) && (egid != cred->sgid)) return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != current->gid) && - (sgid != current->egid) && (sgid != current->sgid)) + if ((sgid != (gid_t) -1) && (sgid != cred->gid) && + (sgid != cred->egid) && (sgid != cred->sgid)) return -EPERM; } if (egid != (gid_t) -1) { - if (egid != current->egid) { + if (egid != cred->egid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = egid; + cred->egid = egid; } - current->fsgid = current->egid; + cred->fsgid = cred->egid; if (rgid != (gid_t) -1) - current->gid = rgid; + cred->gid = rgid; if (sgid != (gid_t) -1) - current->sgid = sgid; + cred->sgid = sgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); @@ -786,11 +796,12 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->gid, rgid)) && - !(retval = put_user(current->egid, egid))) - retval = put_user(current->sgid, sgid); + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) + retval = put_user(cred->sgid, sgid); return retval; } @@ -804,20 +815,21 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { + struct cred *cred = current->cred; int old_fsuid; - old_fsuid = current->fsuid; + old_fsuid = cred->fsuid; if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) return old_fsuid; - if (uid == current->uid || uid == current->euid || - uid == current->suid || uid == current->fsuid || + if (uid == cred->uid || uid == cred->euid || + uid == cred->suid || uid == cred->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = uid; + cred->fsuid = uid; } key_fsuid_changed(current); @@ -833,20 +845,21 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { + struct cred *cred = current->cred; int old_fsgid; - old_fsgid = current->fsgid; + old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) return old_fsgid; - if (gid == current->gid || gid == current->egid || - gid == current->sgid || gid == current->fsgid || + if (gid == cred->gid || gid == cred->egid || + gid == cred->sgid || gid == cred->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsgid = gid; + cred->fsgid = gid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); } @@ -1208,8 +1221,15 @@ int groups_search(struct group_info *group_info, gid_t grp) return 0; } -/* validate and set current->group_info */ -int set_current_groups(struct group_info *group_info) +/** + * set_groups - Change a group subscription in a security record + * @sec: The security record to alter + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon a task security + * record. + */ +int set_groups(struct cred *cred, struct group_info *group_info) { int retval; struct group_info *old_info; @@ -1221,20 +1241,34 @@ int set_current_groups(struct group_info *group_info) groups_sort(group_info); get_group_info(group_info); - task_lock(current); - old_info = current->group_info; - current->group_info = group_info; - task_unlock(current); + spin_lock(&cred->lock); + old_info = cred->group_info; + cred->group_info = group_info; + spin_unlock(&cred->lock); put_group_info(old_info); - return 0; } +EXPORT_SYMBOL(set_groups); + +/** + * set_current_groups - Change current's group subscription + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon current's task + * security record. + */ +int set_current_groups(struct group_info *group_info) +{ + return set_groups(current->cred, group_info); +} + EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { + struct cred *cred = current->cred; int i = 0; /* @@ -1246,13 +1280,13 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) return -EINVAL; /* no need to grab task_lock here; it cannot change */ - i = current->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups_to_user(grouplist, current->group_info)) { + if (groups_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } @@ -1296,9 +1330,10 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->fsgid) - retval = groups_search(current->group_info, grp); + if (grp != cred->fsgid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1306,9 +1341,10 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->egid) - retval = groups_search(current->group_info, grp); + if (grp != cred->egid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1624,7 +1660,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error = 0; + struct task_struct *me = current; + unsigned char comm[sizeof(me->comm)]; + long error; if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; @@ -1635,39 +1673,41 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; } - current->pdeath_signal = arg2; + me->pdeath_signal = arg2; + error = 0; break; case PR_GET_PDEATHSIG: - error = put_user(current->pdeath_signal, (int __user *)arg2); + error = put_user(me->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = get_dumpable(current->mm); + error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - set_dumpable(current->mm, arg2); + set_dumpable(me->mm, arg2); + error = 0; break; case PR_SET_UNALIGN: - error = SET_UNALIGN_CTL(current, arg2); + error = SET_UNALIGN_CTL(me, arg2); break; case PR_GET_UNALIGN: - error = GET_UNALIGN_CTL(current, arg2); + error = GET_UNALIGN_CTL(me, arg2); break; case PR_SET_FPEMU: - error = SET_FPEMU_CTL(current, arg2); + error = SET_FPEMU_CTL(me, arg2); break; case PR_GET_FPEMU: - error = GET_FPEMU_CTL(current, arg2); + error = GET_FPEMU_CTL(me, arg2); break; case PR_SET_FPEXC: - error = SET_FPEXC_CTL(current, arg2); + error = SET_FPEXC_CTL(me, arg2); break; case PR_GET_FPEXC: - error = GET_FPEXC_CTL(current, arg2); + error = GET_FPEXC_CTL(me, arg2); break; case PR_GET_TIMING: error = PR_TIMING_STATISTICAL; @@ -1675,33 +1715,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; + else + error = 0; break; - case PR_SET_NAME: { - struct task_struct *me = current; - unsigned char ncomm[sizeof(me->comm)]; - - ncomm[sizeof(me->comm)-1] = 0; - if (strncpy_from_user(ncomm, (char __user *)arg2, - sizeof(me->comm)-1) < 0) + case PR_SET_NAME: + comm[sizeof(me->comm)-1] = 0; + if (strncpy_from_user(comm, (char __user *)arg2, + sizeof(me->comm) - 1) < 0) return -EFAULT; - set_task_comm(me, ncomm); + set_task_comm(me, comm); return 0; - } - case PR_GET_NAME: { - struct task_struct *me = current; - unsigned char tcomm[sizeof(me->comm)]; - - get_task_comm(tcomm, me); - if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) + case PR_GET_NAME: + get_task_comm(comm, me); + if (copy_to_user((char __user *)arg2, comm, + sizeof(comm))) return -EFAULT; return 0; - } case PR_GET_ENDIAN: - error = GET_ENDIAN(current, arg2); + error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: - error = SET_ENDIAN(current, arg2); + error = SET_ENDIAN(me, arg2); break; case PR_GET_SECCOMP: @@ -1725,6 +1760,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; + error = 0; break; default: error = -EINVAL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..5c97c5b4ea8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(data->comm, tsk->comm, TASK_COMM_LEN); data->pid = tsk->pid; - data->uid = tsk->uid; + data->uid = task_uid(tsk); data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; data->policy = tsk->policy; data->rt_priority = tsk->rt_priority; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 8ebcd8532dfb..6d1ed07bf312 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -53,8 +53,8 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->uid; - stats->ac_gid = tsk->gid; + stats->ac_uid = tsk->cred->uid; + stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); stats->ac_ppid = pid_alive(tsk) ? diff --git a/kernel/uid16.c b/kernel/uid16.c index 3e41c1673e2f..71f07fc39fea 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -86,9 +86,9 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -106,9 +106,9 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -166,20 +166,20 @@ asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -210,20 +210,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..104d22ac84d5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -457,11 +457,11 @@ void switch_uid(struct user_struct *new_user) * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - old_user = current->user; + old_user = current->cred->user; atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); switch_uid_keyring(new_user); - current->user = new_user; + current->cred->user = new_user; sched_switch_user(current); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 07a96474077d..b23492ee3e50 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,12 +1110,12 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { + struct cred *cred, *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; nodemask_t new; nodemask_t task_nodes; - uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1145,10 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6263c24c4afe..794443da1b4f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,10 +1045,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { + struct cred *cred, *tcred; struct task_struct *task; struct mm_struct *mm; int err; - uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1076,10 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 34a458aa7997..3af787ba2077 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,7 +298,7 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->uid, p->tgid, p->mm->total_vm, + p->pid, p->cred->uid, p->tgid, p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, p->comm); task_unlock(p); diff --git a/net/core/scm.c b/net/core/scm.c index 4681d8f9b45b..c28ca32a7d93 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,11 +44,13 @@ static __inline__ int scm_check_creds(struct ucred *creds) { + struct cred *cred = current->cred; + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current_uid() || creds->uid == current_euid() || - creds->uid == current_suid()) || capable(CAP_SETUID)) && - ((creds->gid == current_gid() || creds->gid == current_egid() || - creds->gid == current_sgid()) || capable(CAP_SETGID))) { + ((creds->uid == cred->uid || creds->uid == cred->euid || + creds->uid == cred->suid) || capable(CAP_SETUID)) && + ((creds->gid == cred->gid || creds->gid == cred->egid || + creds->gid == cred->sgid) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8fc380578807..c79543212602 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -353,7 +353,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct auth_cred acred = { .uid = current_fsuid(), .gid = current_fsgid(), - .group_info = current->group_info, + .group_info = current->cred->group_info, }; struct rpc_cred *ret; diff --git a/security/commoncap.c b/security/commoncap.c index fb4e240720d8..fa61679f8c73 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -30,7 +30,7 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + NETLINK_CB(skb).eff_cap = current_cap(); return 0; } @@ -52,7 +52,7 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable(struct task_struct *tsk, int cap, int audit) { /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cap_effective, cap)) + if (cap_raised(tsk->cred->cap_effective, cap)) return 0; return -EPERM; } @@ -67,7 +67,8 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cap_permitted, current->cap_permitted)) + if (cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted)) return 0; if (capable(CAP_SYS_PTRACE)) return 0; @@ -76,8 +77,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int cap_ptrace_traceme(struct task_struct *parent) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(current->cap_permitted, parent->cap_permitted)) + if (cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted)) return 0; if (has_capability(parent, CAP_SYS_PTRACE)) return 0; @@ -87,10 +88,12 @@ int cap_ptrace_traceme(struct task_struct *parent) int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { + struct cred *cred = target->cred; + /* Derived from kernel/capability.c:sys_capget. */ - *effective = target->cap_effective; - *inheritable = target->cap_inheritable; - *permitted = target->cap_permitted; + *effective = cred->cap_effective; + *inheritable = cred->cap_inheritable; + *permitted = cred->cap_permitted; return 0; } @@ -122,24 +125,26 @@ int cap_capset_check(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { + const struct cred *cred = current->cred; + if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_permitted))) { + cap_combine(cred->cap_inheritable, + cred->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_bset))) { + cap_combine(cred->cap_inheritable, + cred->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (current->cap_permitted, - current->cap_permitted))) { + cap_combine (cred->cap_permitted, + cred->cap_permitted))) { return -EPERM; } @@ -155,9 +160,11 @@ void cap_capset_set(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - current->cap_effective = *effective; - current->cap_inheritable = *inheritable; - current->cap_permitted = *permitted; + struct cred *cred = current->cred; + + cred->cap_effective = *effective; + cred->cap_inheritable = *inheritable; + cred->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) @@ -211,8 +218,8 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) */ bprm->cap_post_exec_permitted.cap[i] = - (current->cap_bset.cap[i] & permitted) | - (current->cap_inheritable.cap[i] & inheritable); + (current->cred->cap_bset.cap[i] & permitted) | + (current->cred->cap_inheritable.cap[i] & inheritable); if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { /* @@ -354,8 +361,8 @@ int cap_bprm_set_security (struct linux_binprm *bprm) if (bprm->e_uid == 0 || current_uid() == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ bprm->cap_post_exec_permitted = cap_combine( - current->cap_bset, current->cap_inheritable - ); + current->cred->cap_bset, + current->cred->cap_inheritable); bprm->cap_effective = (bprm->e_uid == 0); ret = 0; } @@ -366,44 +373,39 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - kernel_cap_t pP = current->cap_permitted; - kernel_cap_t pE = current->cap_effective; - uid_t uid; - gid_t gid; + struct cred *cred = current->cred; - current_uid_gid(&uid, &gid); - - if (bprm->e_uid != uid || bprm->e_gid != gid || + if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || !cap_issubset(bprm->cap_post_exec_permitted, - current->cap_permitted)) { + cred->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = uid; - bprm->e_gid = gid; + bprm->e_uid = cred->uid; + bprm->e_gid = cred->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - current->cap_permitted); + cred->cap_permitted); } } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + cred->suid = cred->euid = cred->fsuid = bprm->e_uid; + cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - current->cap_permitted = bprm->cap_post_exec_permitted; + cred->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - current->cap_effective = bprm->cap_post_exec_permitted; + cred->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(current->cap_effective); + cap_clear(cred->cap_effective); } /* @@ -418,27 +420,30 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(current->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || - (bprm->e_uid != 0) || (current->uid != 0) || + if (!cap_isclear(cred->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || + (bprm->e_uid != 0) || (cred->uid != 0) || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &pP, &pE); + audit_log_bprm_fcaps(bprm, &cred->cap_permitted, + &cred->cap_effective); } - current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } int cap_bprm_secureexec (struct linux_binprm *bprm) { - if (current_uid() != 0) { + const struct cred *cred = current->cred; + + if (cred->uid != 0) { if (bprm->cap_effective) return 1; if (!cap_isclear(bprm->cap_post_exec_permitted)) return 1; } - return (current_euid() != current_uid() || - current_egid() != current_gid()); + return (cred->euid != cred->uid || + cred->egid != cred->gid); } int cap_inode_setxattr(struct dentry *dentry, const char *name, @@ -501,25 +506,27 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) static inline void cap_emulate_setxuid (int old_ruid, int old_euid, int old_suid) { - uid_t euid = current_euid(); + struct cred *cred = current->cred; if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (current_uid() != 0 && euid != 0 && current_suid() != 0) && + (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (current->cap_permitted); - cap_clear (current->cap_effective); + cap_clear (cred->cap_permitted); + cap_clear (cred->cap_effective); } - if (old_euid == 0 && euid != 0) { - cap_clear (current->cap_effective); + if (old_euid == 0 && cred->euid != 0) { + cap_clear (cred->cap_effective); } - if (old_euid != 0 && euid == 0) { - current->cap_effective = current->cap_permitted; + if (old_euid != 0 && cred->euid == 0) { + cred->cap_effective = cred->cap_permitted; } } int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags) { + struct cred *cred = current->cred; + switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: @@ -541,16 +548,16 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && current_fsuid() != 0) { - current->cap_effective = + if (old_fsuid == 0 && cred->fsuid != 0) { + cred->cap_effective = cap_drop_fs_set( - current->cap_effective); + cred->cap_effective); } - if (old_fsuid != 0 && current_fsuid() == 0) { - current->cap_effective = + if (old_fsuid != 0 && cred->fsuid == 0) { + cred->cap_effective = cap_raise_fs_set( - current->cap_effective, - current->cap_permitted); + cred->cap_effective, + cred->cap_permitted); } } break; @@ -575,7 +582,8 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cap_permitted, current->cap_permitted) && + if (!cap_issubset(p->cred->cap_permitted, + current->cred->cap_permitted) && !capable(CAP_SYS_NICE)) return -EPERM; return 0; @@ -610,7 +618,7 @@ static long cap_prctl_drop(unsigned long cap) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cap_bset, cap); + cap_lower(current->cred->cap_bset, cap); return 0; } @@ -633,6 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { + struct cred *cred = current->cred; long error = 0; switch (option) { @@ -640,7 +649,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!cap_valid(arg2)) error = -EINVAL; else - error = !!cap_raised(current->cap_bset, arg2); + error = !!cap_raised(cred->cap_bset, arg2); break; #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: @@ -667,9 +676,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) - & (current->securebits ^ arg2)) /*[1]*/ - || ((current->securebits & SECURE_ALL_LOCKS + if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) + & (cred->securebits ^ arg2)) /*[1]*/ + || ((cred->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ @@ -682,11 +691,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, */ error = -EPERM; /* cannot change a locked bit */ } else { - current->securebits = arg2; + cred->securebits = arg2; } break; case PR_GET_SECUREBITS: - error = current->securebits; + error = cred->securebits; break; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ @@ -701,10 +710,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, else if (issecure(SECURE_KEEP_CAPS_LOCKED)) error = -EPERM; else if (arg2) - current->securebits |= issecure_mask(SECURE_KEEP_CAPS); + cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - current->securebits &= - ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); break; default: @@ -719,11 +727,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, void cap_task_reparent_to_init (struct task_struct *p) { - cap_set_init_eff(p->cap_effective); - cap_clear(p->cap_inheritable); - cap_set_full(p->cap_permitted); - p->securebits = SECUREBITS_DEFAULT; - return; + struct cred *cred = p->cred; + + cap_set_init_eff(cred->cap_effective); + cap_clear(cred->cap_inheritable); + cap_set_full(cred->cap_permitted); + p->cred->securebits = SECUREBITS_DEFAULT; } int cap_syslog (int type) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index fcce331eca72..8833b447adef 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -889,7 +889,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -932,8 +932,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error2: @@ -960,7 +960,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -983,8 +983,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error: @@ -999,6 +999,7 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { + struct cred *cred = current->cred; int ret; switch (reqkey_defl) { @@ -1018,10 +1019,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: set: - current->jit_keyring = reqkey_defl; + cred->jit_keyring = reqkey_defl; case KEY_REQKEY_DEFL_NO_CHANGE: - return current->jit_keyring; + return cred->jit_keyring; case KEY_REQKEY_DEFL_GROUP_KEYRING: default: @@ -1086,8 +1087,8 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; ret = 0; goto error; } @@ -1103,8 +1104,8 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->request_key_auth); - current->request_key_auth = authkey; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = authkey; ret = authkey->serial; error: diff --git a/security/keys/permission.c b/security/keys/permission.c index 3b41f9b52537..baf3d5f31e71 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,6 +22,7 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct cred *cred = context->cred; struct key *key; key_perm_t kperm; int ret; @@ -29,7 +30,7 @@ int key_task_permission(const key_ref_t key_ref, key = key_ref_to_ptr(key_ref); /* use the second 8-bits of permissions for keys the caller owns */ - if (key->uid == context->fsuid) { + if (key->uid == cred->fsuid) { kperm = key->perm >> 16; goto use_these_perms; } @@ -37,14 +38,14 @@ int key_task_permission(const key_ref_t key_ref, /* use the third 8-bits of permissions for keys the caller has a group * membership in common with */ if (key->gid != -1 && key->perm & KEY_GRP_ALL) { - if (key->gid == context->fsgid) { + if (key->gid == cred->fsgid) { kperm = key->perm >> 8; goto use_these_perms; } - task_lock(context); - ret = groups_search(context->group_info, key->gid); - task_unlock(context); + spin_lock(&cred->lock); + ret = groups_search(cred->group_info, key->gid); + spin_unlock(&cred->lock); if (ret) { kperm = key->perm >> 8; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 1c793b7090a7..b0904cdda2e7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,7 +42,7 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->user; + struct user_struct *user = current->cred->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -156,7 +156,7 @@ int install_thread_keyring(void) sprintf(buf, "_tid.%u", tsk->pid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -164,8 +164,8 @@ int install_thread_keyring(void) } task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = keyring; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = keyring; task_unlock(tsk); ret = 0; @@ -192,7 +192,7 @@ int install_process_keyring(void) if (!tsk->signal->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -238,7 +238,7 @@ static int install_session_keyring(struct key *keyring) if (tsk->signal->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -292,14 +292,14 @@ int copy_thread_group_keys(struct task_struct *tsk) */ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { - key_check(tsk->thread_keyring); - key_check(tsk->request_key_auth); + key_check(tsk->cred->thread_keyring); + key_check(tsk->cred->request_key_auth); /* no thread keyring yet */ - tsk->thread_keyring = NULL; + tsk->cred->thread_keyring = NULL; /* copy the request_key() authorisation for this thread */ - key_get(tsk->request_key_auth); + key_get(tsk->cred->request_key_auth); return 0; @@ -322,8 +322,8 @@ void exit_thread_group_keys(struct signal_struct *tg) */ void exit_keys(struct task_struct *tsk) { - key_put(tsk->thread_keyring); - key_put(tsk->request_key_auth); + key_put(tsk->cred->thread_keyring); + key_put(tsk->cred->request_key_auth); } /* end exit_keys() */ @@ -337,8 +337,8 @@ int exec_keys(struct task_struct *tsk) /* newly exec'd tasks don't get a thread keyring */ task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = NULL; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = NULL; task_unlock(tsk); key_put(old); @@ -373,10 +373,11 @@ int suid_keys(struct task_struct *tsk) void key_fsuid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->uid = tsk->fsuid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->uid = tsk->cred->fsuid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsuid_changed() */ @@ -388,10 +389,11 @@ void key_fsuid_changed(struct task_struct *tsk) void key_fsgid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->gid = tsk->fsgid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->gid = tsk->cred->fsgid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsgid_changed() */ @@ -426,9 +428,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->thread_keyring) { + if (context->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->thread_keyring, 1), + make_key_ref(context->cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -493,9 +495,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->user->session_keyring) { + else if (context->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), + make_key_ref(context->cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -517,20 +519,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->request_key_auth && + if (context->cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->request_key_auth->sem); + down_read(&context->cred->request_key_auth->sem); - if (key_validate(context->request_key_auth) == 0) { - rka = context->request_key_auth->payload.data; + if (key_validate(context->cred->request_key_auth) == 0) { + rka = context->cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -547,7 +549,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); } } @@ -580,15 +582,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - key_ref_t key_ref, skey_ref; + struct cred *cred = t->cred; struct key *key; + key_ref_t key_ref, skey_ref; int ret; key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!t->thread_keyring) { + if (!cred->thread_keyring) { if (!create) goto error; @@ -599,7 +602,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, } } - key = t->thread_keyring; + key = cred->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -628,7 +631,8 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring(t->user->session_keyring); + ret = install_session_keyring( + cred->user->session_keyring); if (ret < 0) goto error; } @@ -641,25 +645,25 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_USER_KEYRING: - if (!t->user->uid_keyring) { + if (!cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->uid_keyring; + key = cred->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!t->user->session_keyring) { + if (!cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->session_keyring; + key = cred->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -670,7 +674,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = t->request_key_auth; + key = cred->request_key_auth; if (!key) goto error; @@ -679,19 +683,19 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!t->request_key_auth) + if (!cred->request_key_auth) goto error; - down_read(&t->request_key_auth->sem); - if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + down_read(&cred->request_key_auth->sem); + if (cred->request_key_auth->flags & KEY_FLAG_REVOKED) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = t->request_key_auth->payload.data; + rka = cred->request_key_auth->payload.data; key = rka->dest_keyring; atomic_inc(&key->usage); } - up_read(&t->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -791,7 +795,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 8e9d93b4a402..3e9b9eb1dd28 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -104,7 +104,8 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->thread_keyring ? tsk->thread_keyring->serial : 0); + tsk->cred->thread_keyring ? + tsk->cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -117,7 +118,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->user->session_keyring->serial; + sskey = tsk->cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); @@ -232,11 +233,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->jit_keyring) { + switch (tsk->cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->request_key_auth) { - authkey = tsk->request_key_auth; + if (tsk->cred->request_key_auth) { + authkey = tsk->cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -249,7 +250,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->thread_keyring); + dest_keyring = key_get(tsk->cred->thread_keyring); if (dest_keyring) break; @@ -268,11 +269,12 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = key_get(tsk->user->session_keyring); + dest_keyring = + key_get(tsk->cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->user->uid_keyring); + dest_keyring = key_get(tsk->cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1762d44711d5..2125579d5d73 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -164,22 +164,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->request_key_auth) { + if (current->cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->request_key_auth->sem); + down_read(¤t->cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, - ¤t->request_key_auth->flags)) + ¤t->cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->request_key_auth->payload.data; + irka = current->cred->request_key_auth->payload.data; rka->context = irka->context; rka->pid = irka->pid; get_task_struct(rka->context); - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ @@ -214,7 +214,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, return authkey; auth_key_revoked: - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 64af2d3409ef..cf02490cd1eb 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9f6da154cc82..328308f2882a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -167,21 +167,21 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->security = tsec; + task->cred->security = tsec; return 0; } static void task_free_security(struct task_struct *task) { - struct task_security_struct *tsec = task->security; - task->security = NULL; + struct task_security_struct *tsec = task->cred->security; + task->cred->security = NULL; kfree(tsec); } static int inode_alloc_security(struct inode *inode) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode_security_struct *isec; isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); @@ -215,7 +215,7 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec; fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); @@ -554,7 +554,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) { int rc = 0, i; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct superblock_security_struct *sbsec = sb->s_security; const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; @@ -1353,8 +1353,8 @@ static int task_has_perm(struct task_struct *tsk1, { struct task_security_struct *tsec1, *tsec2; - tsec1 = tsk1->security; - tsec2 = tsk2->security; + tsec1 = tsk1->cred->security; + tsec2 = tsk2->cred->security; return avc_has_perm(tsec1->sid, tsec2->sid, SECCLASS_PROCESS, perms, NULL); } @@ -1374,7 +1374,7 @@ static int task_has_capability(struct task_struct *tsk, u32 av = CAP_TO_MASK(cap); int rc; - tsec = tsk->security; + tsec = tsk->cred->security; AVC_AUDIT_DATA_INIT(&ad, CAP); ad.tsk = tsk; @@ -1405,7 +1405,7 @@ static int task_has_system(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; return avc_has_perm(tsec->sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, perms, NULL); @@ -1426,7 +1426,7 @@ static int inode_has_perm(struct task_struct *tsk, if (unlikely(IS_PRIVATE(inode))) return 0; - tsec = tsk->security; + tsec = tsk->cred->security; isec = inode->i_security; if (!adp) { @@ -1466,7 +1466,7 @@ static int file_has_perm(struct task_struct *tsk, struct file *file, u32 av) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct file_security_struct *fsec = file->f_security; struct inode *inode = file->f_path.dentry->d_inode; struct avc_audit_data ad; @@ -1503,7 +1503,7 @@ static int may_create(struct inode *dir, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -1540,7 +1540,7 @@ static int may_create_key(u32 ksid, { struct task_security_struct *tsec; - tsec = ctx->security; + tsec = ctx->cred->security; return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); } @@ -1561,7 +1561,7 @@ static int may_link(struct inode *dir, u32 av; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; isec = dentry->d_inode->i_security; @@ -1606,7 +1606,7 @@ static inline int may_rename(struct inode *old_dir, int old_is_dir, new_is_dir; int rc; - tsec = current->security; + tsec = current->cred->security; old_dsec = old_dir->i_security; old_isec = old_dentry->d_inode->i_security; old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); @@ -1659,7 +1659,7 @@ static int superblock_has_perm(struct task_struct *tsk, struct task_security_struct *tsec; struct superblock_security_struct *sbsec; - tsec = tsk->security; + tsec = tsk->cred->security; sbsec = sb->s_security; return avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); @@ -1758,8 +1758,8 @@ static int selinux_ptrace_may_access(struct task_struct *child, return rc; if (mode == PTRACE_MODE_READ) { - struct task_security_struct *tsec = current->security; - struct task_security_struct *csec = child->security; + struct task_security_struct *tsec = current->cred->security; + struct task_security_struct *csec = child->cred->security; return avc_has_perm(tsec->sid, csec->sid, SECCLASS_FILE, FILE__READ, NULL); } @@ -1874,7 +1874,7 @@ static int selinux_sysctl(ctl_table *table, int op) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; rc = selinux_sysctl_get_sid(table, (op == 0001) ? SECCLASS_DIR : SECCLASS_FILE, &tsid); @@ -2025,7 +2025,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) if (bsec->set) return 0; - tsec = current->security; + tsec = current->cred->security; isec = inode->i_security; /* Default to the current task SID. */ @@ -2090,7 +2090,7 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) static int selinux_bprm_secureexec(struct linux_binprm *bprm) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int atsecure = 0; if (tsec->osid != tsec->sid) { @@ -2214,7 +2214,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) secondary_ops->bprm_apply_creds(bprm, unsafe); - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; sid = bsec->sid; @@ -2243,7 +2243,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) rcu_read_lock(); tracer = tracehook_tracer_task(current); if (likely(tracer != NULL)) { - sec = tracer->security; + sec = tracer->cred->security; ptsid = sec->sid; } rcu_read_unlock(); @@ -2274,7 +2274,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) int rc, i; unsigned long flags; - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; if (bsec->unsafe) { @@ -2521,7 +2521,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, int rc; char *namep = NULL, *context; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -2706,7 +2706,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) static int selinux_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; struct superblock_security_struct *sbsec; @@ -2918,7 +2918,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) static int selinux_file_permission(struct file *file, int mask) { struct inode *inode = file->f_path.dentry->d_inode; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec = file->f_security; struct inode_security_struct *isec = inode->i_security; @@ -2995,7 +2995,8 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { int rc = 0; - u32 sid = ((struct task_security_struct *)(current->security))->sid; + u32 sid = ((struct task_security_struct *) + (current->cred->security))->sid; if (addr < mmap_min_addr) rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, @@ -3107,7 +3108,7 @@ static int selinux_file_set_fowner(struct file *file) struct task_security_struct *tsec; struct file_security_struct *fsec; - tsec = current->security; + tsec = current->cred->security; fsec = file->f_security; fsec->fown_sid = tsec->sid; @@ -3125,7 +3126,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, /* struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - tsec = tsk->security; + tsec = tsk->cred->security; fsec = file->f_security; if (!signum) @@ -3188,12 +3189,12 @@ static int selinux_task_alloc_security(struct task_struct *tsk) struct task_security_struct *tsec1, *tsec2; int rc; - tsec1 = current->security; + tsec1 = current->cred->security; rc = task_alloc_security(tsk); if (rc) return rc; - tsec2 = tsk->security; + tsec2 = tsk->cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3251,7 +3252,7 @@ static int selinux_task_getsid(struct task_struct *p) static void selinux_task_getsecid(struct task_struct *p, u32 *secid) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; *secid = tsec->sid; } @@ -3343,7 +3344,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = PROCESS__SIGNULL; /* null signal; existence test */ else perm = signal_to_av(sig); - tsec = p->security; + tsec = p->cred->security; if (secid) rc = avc_has_perm(secid, tsec->sid, SECCLASS_PROCESS, perm, NULL); else @@ -3375,7 +3376,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) secondary_ops->task_reparent_to_init(p); - tsec = p->security; + tsec = p->cred->security; tsec->osid = tsec->sid; tsec->sid = SECINITSID_KERNEL; return; @@ -3384,7 +3385,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; struct inode_security_struct *isec = inode->i_security; isec->sid = tsec->sid; @@ -3632,7 +3633,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, struct avc_audit_data ad; int err = 0; - tsec = task->security; + tsec = task->cred->security; isec = SOCK_INODE(sock)->i_security; if (isec->sid == SECINITSID_KERNEL) @@ -3656,7 +3657,7 @@ static int selinux_socket_create(int family, int type, if (kern) goto out; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; err = avc_has_perm(tsec->sid, newsid, socket_type_to_security_class(family, type, @@ -3677,7 +3678,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, isec = SOCK_INODE(sock)->i_security; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; isec->sclass = socket_type_to_security_class(family, type, protocol); isec->sid = kern ? SECINITSID_KERNEL : newsid; @@ -3723,7 +3724,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in struct sock *sk = sock->sk; u32 sid, node_perm; - tsec = current->security; + tsec = current->cred->security; isec = SOCK_INODE(sock)->i_security; if (family == PF_INET) { @@ -4764,7 +4765,7 @@ static int ipc_alloc_security(struct task_struct *task, struct kern_ipc_perm *perm, u16 sclass) { - struct task_security_struct *tsec = task->security; + struct task_security_struct *tsec = task->cred->security; struct ipc_security_struct *isec; isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); @@ -4814,7 +4815,7 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = ipc_perms->security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4845,7 +4846,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4871,7 +4872,7 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4917,7 +4918,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4965,7 +4966,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = target->security; + tsec = target->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4992,7 +4993,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5018,7 +5019,7 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5091,7 +5092,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5117,7 +5118,7 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5224,7 +5225,7 @@ static int selinux_getprocattr(struct task_struct *p, return error; } - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "current")) sid = tsec->sid; @@ -5308,7 +5309,7 @@ static int selinux_setprocattr(struct task_struct *p, operation. See selinux_bprm_set_security for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "exec")) tsec->exec_sid = sid; else if (!strcmp(name, "fscreate")) @@ -5361,7 +5362,8 @@ boundary_ok: rcu_read_lock(); tracer = tracehook_tracer_task(p); if (tracer != NULL) { - struct task_security_struct *ptsec = tracer->security; + struct task_security_struct *ptsec = + tracer->cred->security; u32 ptsid = ptsec->sid; rcu_read_unlock(); error = avc_has_perm_noaudit(ptsid, sid, @@ -5405,7 +5407,7 @@ static void selinux_release_secctx(char *secdata, u32 seclen) static int selinux_key_alloc(struct key *k, struct task_struct *tsk, unsigned long flags) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); @@ -5439,7 +5441,7 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); - tsec = ctx->security; + tsec = ctx->cred->security; ksec = key->security; /* if no specific permissions are requested, we skip the @@ -5683,7 +5685,7 @@ static __init int selinux_init(void) /* Set the security state for the initial task. */ if (task_alloc_security(current)) panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->security; + tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; sel_inode_cache = kmem_cache_create("selinux_inode_security", diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 69c9dccc8cf0..10715d1330b9 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -97,7 +97,7 @@ static int task_has_security(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; if (!tsec) return -EACCES; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 8f17f542a116..d7db76617b0e 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 79ff21ed4c3b..b6dd4fc0fb0b 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->security, obj_label, mode); + rc = smk_access(current->cred->security, obj_label, mode); if (rc == 0) return 0; @@ -173,7 +173,7 @@ int smk_curacc(char *obj_label, u32 mode) * only one that gets privilege and current does not * have that label. */ - if (smack_onlycap != NULL && smack_onlycap != current->security) + if (smack_onlycap != NULL && smack_onlycap != current->cred->security) return rc; if (capable(CAP_MAC_OVERRIDE)) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 6e2dc0bab70d..791da238d049 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -102,7 +102,8 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->security, ctp->security, MAY_READWRITE); + rc = smk_access(current->cred->security, ctp->cred->security, + MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -124,7 +125,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->security, current->security, MAY_READWRITE); + rc = smk_access(ptp->cred->security, current->cred->security, + MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -141,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->security; + char *sp = current->cred->security; rc = cap_syslog(type); if (rc != 0) @@ -373,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->security); + inode->i_security = new_inode_smack(current->cred->security); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -818,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -916,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -941,7 +943,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->security, MAY_WRITE); + rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -984,7 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_task_alloc_security(struct task_struct *tsk) { - tsk->security = current->security; + tsk->cred->security = current->cred->security; return 0; } @@ -999,7 +1001,7 @@ static int smack_task_alloc_security(struct task_struct *tsk) */ static void smack_task_free_security(struct task_struct *task) { - task->security = NULL; + task->cred->security = NULL; } /** @@ -1011,7 +1013,7 @@ static void smack_task_free_security(struct task_struct *task) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1022,7 +1024,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1033,7 +1035,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1045,7 +1047,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->security); + *secid = smack_to_secid(p->cred->security); } /** @@ -1061,7 +1063,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1078,7 +1080,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1090,7 +1092,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1108,7 +1110,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1120,7 +1122,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1131,7 +1133,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1154,13 +1156,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); } /** @@ -1173,7 +1175,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->security, p->security, MAY_WRITE); + rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); if (rc == 0) return 0; @@ -1204,7 +1206,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->security; + isp->smk_inode = p->cred->security; } /* @@ -1223,7 +1225,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->security; + char *csp = current->cred->security; struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1448,7 +1450,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->security; + msg->security = current->cred->security; return 0; } @@ -1484,7 +1486,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1593,7 +1595,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1697,7 +1699,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->security; + kisp->security = current->cred->security; return 0; } @@ -1852,7 +1854,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->security; + char *csp = current->cred->security; char *fetched; char *final; struct dentry *dp; @@ -2009,7 +2011,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->security, GFP_KERNEL); + cp = kstrdup(p->cred->security, GFP_KERNEL); if (cp == NULL) return -ENOMEM; @@ -2055,7 +2057,7 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->security = newsmack; + p->cred->security = newsmack; return size; } @@ -2288,8 +2290,8 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->security; - ssp->smk_out = current->security; + ssp->smk_in = current->cred->security; + ssp->smk_out = current->cred->security; ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); @@ -2362,7 +2364,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, static int smack_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) { - key->security = tsk->security; + key->security = tsk->cred->security; return 0; } @@ -2403,10 +2405,11 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->security == NULL) + if (context->cred->security == NULL) return -EACCES; - return smk_access(context->security, keyp->security, MAY_READWRITE); + return smk_access(context->cred->security, keyp->security, + MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2726,7 +2729,7 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->security = &smack_known_floor.smk_known; + current->cred->security = &smack_known_floor.smk_known; /* * Initialize locks diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c21d8c8bf0c7..c5ca279e0506 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); @@ -843,7 +843,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char in[SMK_LABELLEN]; - char *sp = current->security; + char *sp = current->cred->security; if (!capable(CAP_MAC_ADMIN)) return -EPERM; -- cgit v1.2.3 From f1752eec6145c97163dbce62d17cf5d928e28a27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:17 +1100 Subject: CRED: Detach the credentials from task_struct Detach the credentials from task_struct, duplicating them in copy_process() and releasing them in __put_task_struct(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 29 ++++++++++++++ include/linux/init_task.h | 16 +------- include/linux/sched.h | 1 - include/linux/security.h | 26 ++++++------- kernel/Makefile | 2 +- kernel/cred.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 24 +++--------- security/capability.c | 8 ++-- security/security.c | 8 ++-- security/selinux/hooks.c | 32 ++++++++-------- security/smack/smack_lsm.c | 20 +++++----- 11 files changed, 179 insertions(+), 83 deletions(-) create mode 100644 kernel/cred.c (limited to 'kernel') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3e65587a72e5..a7a686074cb0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -158,4 +158,33 @@ do { \ *(_gid) = current->cred->fsgid; \ } while(0) +extern void __put_cred(struct cred *); +extern int copy_creds(struct task_struct *, unsigned long); + +/** + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. + */ +static inline struct cred *get_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} + +/** + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. + */ +static inline void put_cred(struct cred *cred) +{ + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); +} + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9de41ccd67b5..5e24c54b6dfd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -115,19 +115,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#define INIT_CRED(p) \ -{ \ - .usage = ATOMIC_INIT(3), \ - .securebits = SECUREBITS_DEFAULT, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .user = INIT_USER, \ - .group_info = &init_groups, \ - .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ -} - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,8 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .__temp_cred = INIT_CRED(tsk.__temp_cred), \ - .cred = &tsk.__temp_cred, \ + .cred = &init_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8b92502354d..740cf946c8cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ struct cred *cred; /* actual/objective task credentials */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/security.h b/include/linux/security.h index 9f305d4a31a7..9239cc11eb9c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -593,15 +593,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @task_alloc_security: - * @p contains the task_struct for child process. - * Allocate and attach a security structure to the p->security field. The - * security field is initialized to NULL when the task structure is + * @cred_alloc_security: + * @cred contains the cred struct for child process. + * Allocate and attach a security structure to the cred->security field. + * The security field is initialized to NULL when the task structure is * allocated. * Return 0 if operation was successful. - * @task_free_security: - * @p contains the task_struct for process. - * Deallocate and clear the p->security field. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1405,8 +1405,8 @@ struct security_operations { int (*dentry_open) (struct file *file); int (*task_create) (unsigned long clone_flags); - int (*task_alloc_security) (struct task_struct *p); - void (*task_free_security) (struct task_struct *p); + int (*cred_alloc_security) (struct cred *cred); + void (*cred_free) (struct cred *cred); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , uid_t old_euid, uid_t old_suid, int flags); @@ -1660,8 +1660,8 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file); int security_task_create(unsigned long clone_flags); -int security_task_alloc(struct task_struct *p); -void security_task_free(struct task_struct *p); +int security_cred_alloc(struct cred *cred); +void security_cred_free(struct cred *cred); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -2181,12 +2181,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_task_alloc(struct task_struct *p) +static inline int security_cred_alloc(struct cred *cred) { return 0; } -static inline void security_task_free(struct task_struct *p) +static inline void security_cred_free(struct cred *cred) { } static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d84..5a6a612c302d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o CFLAGS_REMOVE_sched.o = -mno-spe diff --git a/kernel/cred.c b/kernel/cred.c new file mode 100644 index 000000000000..833244a7cb05 --- /dev/null +++ b/kernel/cred.c @@ -0,0 +1,96 @@ +/* Task credentials management + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The initial credentials for the initial task + */ +struct cred init_cred = { + .usage = ATOMIC_INIT(3), + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_INIT_INH_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_INIT_EFF_SET, + .cap_bset = CAP_INIT_BSET, + .user = INIT_USER, + .group_info = &init_groups, +}; + +/* + * The RCU callback to actually dispose of a set of credentials + */ +static void put_cred_rcu(struct rcu_head *rcu) +{ + struct cred *cred = container_of(rcu, struct cred, rcu); + + BUG_ON(atomic_read(&cred->usage) != 0); + + key_put(cred->thread_keyring); + key_put(cred->request_key_auth); + put_group_info(cred->group_info); + free_uid(cred->user); + security_cred_free(cred); + kfree(cred); +} + +/** + * __put_cred - Destroy a set of credentials + * @sec: The record to release + * + * Destroy a set of credentials on which no references remain. + */ +void __put_cred(struct cred *cred) +{ + call_rcu(&cred->rcu, put_cred_rcu); +} +EXPORT_SYMBOL(__put_cred); + +/* + * Copy credentials for the new process created by fork() + */ +int copy_creds(struct task_struct *p, unsigned long clone_flags) +{ + struct cred *pcred; + int ret; + + pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); + if (!pcred) + return -ENOMEM; + +#ifdef CONFIG_SECURITY + pcred->security = NULL; +#endif + + ret = security_cred_alloc(pcred); + if (ret < 0) { + kfree(pcred); + return ret; + } + + atomic_set(&pcred->usage, 1); + get_group_info(pcred->group_info); + get_uid(pcred->user); + key_get(pcred->thread_keyring); + key_get(pcred->request_key_auth); + + atomic_inc(&pcred->user->processes); + + /* RCU assignment is unneeded here as no-one can have accessed this + * pointer yet, barring us */ + p->cred = pcred; + return 0; +} diff --git a/kernel/fork.c b/kernel/fork.c index 81fdc7733908..c932e283ddfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,9 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - security_task_free(tsk); - free_uid(tsk->__temp_cred.user); - put_group_info(tsk->__temp_cred.group_info); + put_cred(tsk->cred); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,7 +967,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif - p->cred = &p->__temp_cred; retval = -EAGAIN; if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -978,9 +975,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free; } - atomic_inc(&p->cred->user->__count); - atomic_inc(&p->cred->user->processes); - get_group_info(p->cred->group_info); + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1032,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); -#ifdef CONFIG_SECURITY - p->cred->security = NULL; -#endif p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1082,10 +1076,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) - goto bad_fork_cleanup_security; + goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; @@ -1284,8 +1276,6 @@ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); -bad_fork_cleanup_security: - security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); @@ -1298,9 +1288,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->cred->group_info); - atomic_dec(&p->cred->user->processes); - free_uid(p->cred->user); + put_cred(p->cred); bad_fork_free: free_task(p); fork_out: diff --git a/security/capability.c b/security/capability.c index 245874819036..6c4b5137ca7b 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,12 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_task_alloc_security(struct task_struct *p) +static int cap_cred_alloc_security(struct cred *cred) { return 0; } -static void cap_task_free_security(struct task_struct *p) +static void cap_cred_free(struct cred *cred) { } @@ -890,8 +890,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, task_alloc_security); - set_to_cap_if_null(ops, task_free_security); + set_to_cap_if_null(ops, cred_alloc_security); + set_to_cap_if_null(ops, cred_free); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_post_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index 81c956a12300..d058f7d5b10a 100644 --- a/security/security.c +++ b/security/security.c @@ -616,14 +616,14 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_task_alloc(struct task_struct *p) +int security_cred_alloc(struct cred *cred) { - return security_ops->task_alloc_security(p); + return security_ops->cred_alloc_security(cred); } -void security_task_free(struct task_struct *p) +void security_cred_free(struct cred *cred) { - security_ops->task_free_security(p); + security_ops->cred_free(cred); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 328308f2882a..658435dce37c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -158,7 +158,7 @@ static int selinux_secmark_enabled(void) /* Allocate and free functions for each kind of security blob. */ -static int task_alloc_security(struct task_struct *task) +static int cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec; @@ -167,18 +167,11 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->cred->security = tsec; + cred->security = tsec; return 0; } -static void task_free_security(struct task_struct *task) -{ - struct task_security_struct *tsec = task->cred->security; - task->cred->security = NULL; - kfree(tsec); -} - static int inode_alloc_security(struct inode *inode) { struct task_security_struct *tsec = current->cred->security; @@ -3184,17 +3177,17 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_task_alloc_security(struct task_struct *tsk) +static int selinux_cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec1, *tsec2; int rc; tsec1 = current->cred->security; - rc = task_alloc_security(tsk); + rc = cred_alloc_security(cred); if (rc) return rc; - tsec2 = tsk->cred->security; + tsec2 = cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3208,9 +3201,14 @@ static int selinux_task_alloc_security(struct task_struct *tsk) return 0; } -static void selinux_task_free_security(struct task_struct *tsk) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - task_free_security(tsk); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -5552,8 +5550,8 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .task_alloc_security = selinux_task_alloc_security, - .task_free_security = selinux_task_free_security, + .cred_alloc_security = selinux_cred_alloc_security, + .cred_free = selinux_cred_free, .task_setuid = selinux_task_setuid, .task_post_setuid = selinux_task_post_setuid, .task_setgid = selinux_task_setgid, @@ -5683,7 +5681,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (task_alloc_security(current)) + if (cred_alloc_security(current->cred)) panic("SELinux: Failed to initialize initial task.\n"); tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 791da238d049..cc837314fb0e 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -975,8 +975,8 @@ static int smack_file_receive(struct file *file) */ /** - * smack_task_alloc_security - "allocate" a task blob - * @tsk: the task in need of a blob + * smack_cred_alloc_security - "allocate" a task cred blob + * @cred: the task creds in need of a blob * * Smack isn't using copies of blobs. Everyone * points to an immutable list. No alloc required. @@ -984,24 +984,24 @@ static int smack_file_receive(struct file *file) * * Always returns 0 */ -static int smack_task_alloc_security(struct task_struct *tsk) +static int smack_cred_alloc_security(struct cred *cred) { - tsk->cred->security = current->cred->security; + cred->security = current->cred->security; return 0; } /** - * smack_task_free_security - "free" a task blob - * @task: the task with the blob + * smack_cred_free - "free" task-level security credentials + * @cred: the credentials in question * * Smack isn't using copies of blobs. Everyone * points to an immutable list. The blobs never go away. * There is no leak here. */ -static void smack_task_free_security(struct task_struct *task) +static void smack_cred_free(struct cred *cred) { - task->cred->security = NULL; + cred->security = NULL; } /** @@ -2630,8 +2630,8 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .task_alloc_security = smack_task_alloc_security, - .task_free_security = smack_task_free_security, + .cred_alloc_security = smack_cred_alloc_security, + .cred_free = smack_cred_free, .task_post_setuid = cap_task_post_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, -- cgit v1.2.3 From 86a264abe542cfececb4df129bc45a0338d8cdb9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:18 +1100 Subject: CRED: Wrap current->cred and a few other accessors Wrap current->cred and a few other accessors to hide their actual implementation. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/ia32/sys_ia32.c | 7 +- drivers/net/tun.c | 8 +- drivers/usb/core/devio.c | 10 ++- fs/binfmt_elf.c | 10 +-- fs/binfmt_elf_fdpic.c | 9 +- fs/exec.c | 5 +- fs/fcntl.c | 3 +- fs/file_table.c | 7 +- fs/hugetlbfs/inode.c | 5 +- fs/ioprio.c | 4 +- fs/smbfs/dir.c | 3 +- include/linux/cred.h | 187 ++++++++++++++++++++++++++++++++---------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/sys.c | 59 +++++++------ kernel/uid16.c | 31 +++---- net/core/scm.c | 2 +- net/sunrpc/auth.c | 14 ++-- security/commoncap.c | 2 +- security/keys/process_keys.c | 2 +- security/keys/request_key.c | 11 +-- security/selinux/exports.c | 8 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 2 +- security/smack/smack_lsm.c | 26 +++--- security/smack/smackfs.c | 4 +- 27 files changed, 271 insertions(+), 162 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 2445a9d3488e..16ef61a91d95 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1767,25 +1767,24 @@ groups16_from_user(struct group_info *group_info, short __user *grouplist) asmlinkage long sys32_getgroups16 (int gidsetsize, short __user *grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b14e2025e221..55dc70c6b4db 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -702,6 +702,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) struct tun_net *tn; struct tun_struct *tun; struct net_device *dev; + const struct cred *cred = current_cred(); int err; tn = net_generic(net, tun_net_id); @@ -712,11 +713,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) /* Check permissions */ if (((tun->owner != -1 && - current_euid() != tun->owner) || + cred->euid != tun->owner) || (tun->group != -1 && - current_egid() != tun->group)) && - !capable(CAP_NET_ADMIN)) + cred->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) { return -EPERM; + } } else if (__dev_get_by_name(net, ifr->ifr_name)) return -EINVAL; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 1aadb9387027..aa79280df15d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -574,6 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file) { struct usb_device *dev = NULL; struct dev_state *ps; + const struct cred *cred = current_cred(); int ret; lock_kernel(); @@ -617,8 +618,8 @@ static int usbdev_open(struct inode *inode, struct file *file) init_waitqueue_head(&ps->wait); ps->discsignr = 0; ps->disc_pid = get_pid(task_pid(current)); - ps->disc_uid = current_uid(); - ps->disc_euid = current_euid(); + ps->disc_uid = cred->uid; + ps->disc_euid = cred->euid; ps->disccontext = NULL; ps->ifclaimed = 0; security_task_getsecid(current, &ps->secid); @@ -967,6 +968,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, struct usb_host_endpoint *ep; struct async *as; struct usb_ctrlrequest *dr = NULL; + const struct cred *cred = current_cred(); unsigned int u, totlen, isofrmlen; int ret, ifnum = -1; int is_in; @@ -1174,8 +1176,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, as->signr = uurb->signr; as->ifnum = ifnum; as->pid = get_pid(task_pid(current)); - as->uid = current_uid(); - as->euid = current_euid(); + as->uid = cred->uid; + as->euid = cred->euid; security_task_getsecid(current, &as->secid); if (!is_in) { if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7a52477ce493..0e6655613169 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, int items; elf_addr_t *elf_info; int ei_index = 0; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct vm_area_struct *vma; /* @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->cred->uid); - NEW_AUX_ENT(AT_EUID, tsk->cred->euid); - NEW_AUX_ENT(AT_GID, tsk->cred->gid); - NEW_AUX_ENT(AT_EGID, tsk->cred->egid); + NEW_AUX_ENT(AT_UID, cred->uid); + NEW_AUX_ENT(AT_EUID, cred->euid); + NEW_AUX_ENT(AT_GID, cred->gid); + NEW_AUX_ENT(AT_EGID, cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9f67054c2c4e..1f6e8c023b4c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, struct elf_fdpic_params *exec_params, struct elf_fdpic_params *interp_params) { + const struct cred *cred = current_cred(); unsigned long sp, csp, nitems; elf_caddr_t __user *argv, *envp; size_t platform_len = 0, len; @@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); + NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); diff --git a/fs/exec.c b/fs/exec.c index 31149e430a89..a5330e1a2216 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1388,6 +1388,7 @@ EXPORT_SYMBOL(set_binfmt); */ static int format_corename(char *corename, long signr) { + const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; @@ -1424,7 +1425,7 @@ static int format_corename(char *corename, long signr) /* uid */ case 'u': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_uid()); + "%d", cred->uid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1432,7 +1433,7 @@ static int format_corename(char *corename, long signr) /* gid */ case 'g': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_gid()); + "%d", cred->gid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; diff --git a/fs/fcntl.c b/fs/fcntl.c index 63964d863ad6..c594cc0e40fb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -205,13 +205,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { + const struct cred *cred = current_cred(); int err; err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, pid, type, current_uid(), current_euid(), force); + f_modown(filp, pid, type, cred->uid, cred->euid, force); return 0; } EXPORT_SYMBOL(__f_setown); diff --git a/fs/file_table.c b/fs/file_table.c index 3152b53cfab0..bc4563fe791d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -94,7 +94,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp, */ struct file *get_empty_filp(void) { - struct task_struct *tsk; + const struct cred *cred = current_cred(); static int old_max; struct file * f; @@ -118,12 +118,11 @@ struct file *get_empty_filp(void) if (security_file_alloc(f)) goto fail_sec; - tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->cred->fsuid; - f->f_gid = tsk->cred->fsgid; + f->f_uid = cred->fsuid; + f->f_gid = cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 870a721b8bd2..7d479ce3aceb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; + struct user_struct *user = current_user(); if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); @@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->cred->user)) + if (!user_shm_lock(size, user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +999,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->cred->user); + user_shm_unlock(size, user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index bb5210af77c2..5112554fd210 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 9e9bb0db4f6d..e7ddd0328ddc 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID; attr.ia_mode = mode; - attr.ia_uid = current_euid(); - attr.ia_gid = current_egid(); + current_euid_egid(&attr.ia_uid, &attr.ia_gid); if (!new_valid_dev(dev)) return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index a7a686074cb0..4221ec6000c1 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -37,15 +37,16 @@ struct group_info { * get_group_info - Get a reference to a group info structure * @group_info: The group info to reference * - * This must be called with the owning task locked (via task_lock()) when task - * != current. The reason being that the vast majority of callers are looking - * at current->group_info, which can not be changed except by the current task. - * Changing current->group_info requires the task lock, too. + * This gets a reference to a set of supplementary groups. + * + * If the caller is accessing a task's credentials, they must hold the RCU read + * lock when reading. */ -#define get_group_info(group_info) \ -do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) +static inline struct group_info *get_group_info(struct group_info *gi) +{ + atomic_inc(&gi->usage); + return gi; +} /** * put_group_info - Release a reference to a group info structure @@ -61,7 +62,7 @@ extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); -extern int groups_search(struct group_info *, gid_t); +extern int groups_search(const struct group_info *, gid_t); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ @@ -123,41 +124,6 @@ struct cred { spinlock_t lock; /* lock for pointer changes */ }; -#define get_current_user() (get_uid(current->cred->user)) - -#define task_uid(task) ((task)->cred->uid) -#define task_gid(task) ((task)->cred->gid) -#define task_euid(task) ((task)->cred->euid) -#define task_egid(task) ((task)->cred->egid) - -#define current_uid() (current->cred->uid) -#define current_gid() (current->cred->gid) -#define current_euid() (current->cred->euid) -#define current_egid() (current->cred->egid) -#define current_suid() (current->cred->suid) -#define current_sgid() (current->cred->sgid) -#define current_fsuid() (current->cred->fsuid) -#define current_fsgid() (current->cred->fsgid) -#define current_cap() (current->cred->cap_effective) - -#define current_uid_gid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->uid; \ - *(_gid) = current->cred->gid; \ -} while(0) - -#define current_euid_egid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->euid; \ - *(_gid) = current->cred->egid; \ -} while(0) - -#define current_fsuid_fsgid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->fsuid; \ - *(_gid) = current->cred->fsgid; \ -} while(0) - extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); @@ -187,4 +153,137 @@ static inline void put_cred(struct cred *cred) __put_cred(cred); } +/** + * current_cred - Access the current task's credentials + * + * Access the credentials of the current task. + */ +#define current_cred() \ + (current->cred) + +/** + * __task_cred - Access another task's credentials + * @task: The task to query + * + * Access the credentials of another task. The caller must hold the + * RCU readlock. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define __task_cred(task) \ + ((const struct cred *)(rcu_dereference((task)->cred))) + +/** + * get_task_cred - Get another task's credentials + * @task: The task to query + * + * Get the credentials of a task, pinning them so that they can't go away. + * Accessing a task's credentials directly is not permitted. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define get_task_cred(task) \ +({ \ + struct cred *__cred; \ + rcu_read_lock(); \ + __cred = (struct cred *) __task_cred((task)); \ + get_cred(__cred); \ + rcu_read_unlock(); \ + __cred; \ +}) + +/** + * get_current_cred - Get the current task's credentials + * + * Get the credentials of the current task, pinning them so that they can't go + * away. Accessing the current task's credentials directly is not permitted. + */ +#define get_current_cred() \ + (get_cred(current_cred())) + +/** + * get_current_user - Get the current task's user_struct + * + * Get the user record of the current task, pinning it so that it can't go + * away. + */ +#define get_current_user() \ +({ \ + struct user_struct *__u; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __u = get_uid(__cred->user); \ + __u; \ +}) + +/** + * get_current_groups - Get the current task's supplementary group list + * + * Get the supplementary group list of the current task, pinning it so that it + * can't go away. + */ +#define get_current_groups() \ +({ \ + struct group_info *__groups; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __groups = get_group_info(__cred->group_info); \ + __groups; \ +}) + +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(task->cred->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ +}) + +#define task_uid(task) (task_cred_xxx((task), uid)) +#define task_euid(task) (task_cred_xxx((task), euid)) + +#define current_cred_xxx(xxx) \ +({ \ + current->cred->xxx; \ +}) + +#define current_uid() (current_cred_xxx(uid)) +#define current_gid() (current_cred_xxx(gid)) +#define current_euid() (current_cred_xxx(euid)) +#define current_egid() (current_cred_xxx(egid)) +#define current_suid() (current_cred_xxx(suid)) +#define current_sgid() (current_cred_xxx(sgid)) +#define current_fsuid() (current_cred_xxx(fsuid)) +#define current_fsgid() (current_cred_xxx(fsgid)) +#define current_cap() (current_cred_xxx(cap_effective)) +#define current_user() (current_cred_xxx(user)) +#define current_security() (current_cred_xxx(security)) + +#define current_uid_gid(_uid, _gid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_uid) = __cred->uid; \ + *(_gid) = __cred->gid; \ +} while(0) + +#define current_euid_egid(_euid, _egid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_euid) = __cred->euid; \ + *(_egid) = __cred->egid; \ +} while(0) + +#define current_fsuid_fsgid(_fsuid, _fsgid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_fsuid) = __cred->fsuid; \ + *(_fsgid) = __cred->fsgid; \ +} while(0) + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 6d389491bfa2..d2c5ed845bcc 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->cred->securebits) +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e1885b494bac..1151881ccb9a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -112,6 +112,7 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) static struct inode *mqueue_get_inode(struct super_block *sb, int mode, struct mq_attr *attr) { + struct user_struct *u = current_user(); struct inode *inode; inode = new_inode(sb); @@ -126,7 +127,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 264a9d33c5dd..38a055758a9b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->cred->user; + shp->mlock_user = current_user(); } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct *user = current->cred->user; + struct user_struct *user = current_user(); if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/sys.c b/kernel/sys.c index 5d81f07c0150..c4d6b59553e9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,6 +143,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); int error = -EINVAL; struct pid *pgrp; @@ -176,18 +177,18 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) + if (__task_cred(p)->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* For find_user() */ break; } @@ -207,6 +208,7 @@ asmlinkage long sys_getpriority(int which, int who) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); long niceval, retval = -ESRCH; struct pid *pgrp; @@ -238,21 +240,21 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = (struct user_struct *) cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) { + if (__task_cred(p)->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; } while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* for find_user() */ break; } @@ -743,11 +745,11 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->uid, ruid)) && - !(retval = put_user(cred->euid, euid))) + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) retval = put_user(cred->suid, suid); return retval; @@ -796,11 +798,11 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->gid, rgid)) && - !(retval = put_user(cred->egid, egid))) + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) retval = put_user(cred->sgid, sgid); return retval; @@ -1199,7 +1201,7 @@ static void groups_sort(struct group_info *group_info) } /* a simple bsearch */ -int groups_search(struct group_info *group_info, gid_t grp) +int groups_search(const struct group_info *group_info, gid_t grp) { unsigned int left, right; @@ -1268,13 +1270,8 @@ EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { - struct cred *cred = current->cred; - int i = 0; - - /* - * SMP: Nobody else can change our grouplist. Thus we are - * safe. - */ + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; @@ -1330,8 +1327,9 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->fsgid) retval = groups_search(cred->group_info, grp); return retval; @@ -1341,8 +1339,9 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->egid) retval = groups_search(cred->group_info, grp); return retval; diff --git a/kernel/uid16.c b/kernel/uid16.c index 71f07fc39fea..2460c3199b5a 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -84,11 +84,12 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && - !(retval = put_user(high2lowuid(current->cred->euid), euid))) - retval = put_user(high2lowuid(current->cred->suid), suid); + if (!(retval = put_user(high2lowuid(cred->uid), ruid)) && + !(retval = put_user(high2lowuid(cred->euid), euid))) + retval = put_user(high2lowuid(cred->suid), suid); return retval; } @@ -104,11 +105,12 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && - !(retval = put_user(high2lowgid(current->cred->egid), egid))) - retval = put_user(high2lowgid(current->cred->sgid), sgid); + if (!(retval = put_user(high2lowgid(cred->gid), rgid)) && + !(retval = put_user(high2lowgid(cred->egid), egid))) + retval = put_user(high2lowgid(cred->sgid), sgid); return retval; } @@ -161,25 +163,24 @@ static int groups16_from_user(struct group_info *group_info, asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) { - int i = 0; + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } @@ -210,20 +211,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->cred->uid); + return high2lowuid(current_uid()); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->cred->euid); + return high2lowuid(current_euid()); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->cred->gid); + return high2lowgid(current_gid()); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->cred->egid); + return high2lowgid(current_egid()); } diff --git a/net/core/scm.c b/net/core/scm.c index c28ca32a7d93..f73c44b17dda 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,7 +44,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && ((creds->uid == cred->uid || creds->uid == cred->euid || diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index c79543212602..0443f8349458 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int flags) { - struct auth_cred acred = { - .uid = current_fsuid(), - .gid = current_fsgid(), - .group_info = current->cred->group_info, - }; + struct auth_cred acred; struct rpc_cred *ret; + const struct cred *cred = current_cred(); dprintk("RPC: looking up %s cred\n", auth->au_ops->au_name); - get_group_info(acred.group_info); + + memset(&acred, 0, sizeof(acred)); + acred.uid = cred->fsuid; + acred.gid = cred->fsgid; + acred.group_info = get_group_info(((struct cred *)cred)->group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, flags); put_group_info(acred.group_info); return ret; diff --git a/security/commoncap.c b/security/commoncap.c index fa61679f8c73..61307f590003 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -641,7 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { - struct cred *cred = current->cred; + struct cred *cred = current_cred(); long error = 0; switch (option) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b0904cdda2e7..ce8ac6073d57 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -582,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = t->cred; + struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref, skey_ref; int ret; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3e9b9eb1dd28..0488b0af5bd6 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -67,6 +67,7 @@ static int call_sbin_request_key(struct key_construction *cons, void *aux) { struct task_struct *tsk = current; + const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; char *argv[9], *envp[3], uid_str[12], gid_str[12]; @@ -96,16 +97,16 @@ static int call_sbin_request_key(struct key_construction *cons, goto error_link; /* record the UID and GID */ - sprintf(uid_str, "%d", current_fsuid()); - sprintf(gid_str, "%d", current_fsgid()); + sprintf(uid_str, "%d", cred->fsuid); + sprintf(gid_str, "%d", cred->fsgid); /* we say which key is under construction */ sprintf(key_str, "%d", key->serial); /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->cred->thread_keyring ? - tsk->cred->thread_keyring->serial : 0); + cred->thread_keyring ? + cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -118,7 +119,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->cred->user->session_keyring->serial; + sskey = cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index cf02490cd1eb..c73aeaa008e8 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,9 +39,13 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *__tsec; + u32 tsid; - return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, + __tsec = current_security(); + tsid = __tsec->sid; + + return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); } return 0; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d7db76617b0e..c0eb72013d67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index b6dd4fc0fb0b..247cec3b5a43 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->cred->security, obj_label, mode); + rc = smk_access(current_security(), obj_label, mode); if (rc == 0) return 0; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cc837314fb0e..e8a4fcb1ad04 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -143,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->cred->security; + char *sp = current_security(); rc = cap_syslog(type); if (rc != 0) @@ -375,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->cred->security); + inode->i_security = new_inode_smack(current_security()); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -820,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -918,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -986,8 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_cred_alloc_security(struct cred *cred) { - cred->security = current->cred->security; - + cred->security = current_security(); return 0; } @@ -1225,7 +1224,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->cred->security; + char *csp = current_security(); struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1450,7 +1449,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->cred->security; + msg->security = current_security(); return 0; } @@ -1486,7 +1485,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1595,7 +1594,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1699,7 +1698,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->cred->security; + kisp->security = current_security(); return 0; } @@ -1854,7 +1853,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->cred->security; + char *csp = current_security(); char *fetched; char *final; struct dentry *dp; @@ -2290,8 +2289,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->cred->security; - ssp->smk_out = current->cred->security; + ssp->smk_in = ssp->smk_out = current_security(); ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c5ca279e0506..ca257dfdc75d 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); -- cgit v1.2.3 From c69e8d9c01db2adc503464993c358901c9af9de4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:19 +1100 Subject: CRED: Use RCU to access another task's creds and to release a task's own creds Use RCU to access another task's creds and to release a task's own creds. This means that it will be possible for the credentials of a task to be replaced without another task (a) requiring a full lock to read them, and (b) seeing deallocated memory. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/kernel/perfmon.c | 32 +++++++++++++--------- drivers/connector/cn_proc.c | 16 +++++++---- fs/binfmt_elf.c | 8 ++++-- fs/binfmt_elf_fdpic.c | 8 ++++-- fs/fcntl.c | 15 ++++++++--- fs/fuse/dir.c | 23 ++++++++++------ fs/ioprio.c | 14 +++++++--- fs/proc/array.c | 32 ++++++++++++++-------- fs/proc/base.c | 32 ++++++++++++++++------ include/linux/cred.h | 3 ++- kernel/auditsc.c | 33 +++++++++++++---------- kernel/cgroup.c | 16 +++++------ kernel/exit.c | 14 ++++++---- kernel/futex.c | 22 +++++++++------ kernel/futex_compat.c | 7 ++--- kernel/ptrace.c | 22 ++++++++------- kernel/sched.c | 31 ++++++++++++++------- kernel/signal.c | 49 ++++++++++++++++++++------------- kernel/sys.c | 11 +++++--- kernel/tsacct.c | 6 +++-- mm/mempolicy.c | 8 +++--- mm/migrate.c | 8 +++--- mm/oom_kill.c | 6 ++--- security/commoncap.c | 64 +++++++++++++++++++++++++++----------------- security/keys/permission.c | 10 ++++--- security/keys/process_keys.c | 24 ++++++++++------- security/selinux/selinuxfs.c | 13 ++++++--- security/smack/smack_lsm.c | 32 +++++++++++----------- 28 files changed, 355 insertions(+), 204 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index dd38db46a77a..0e499757309b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2399,25 +2399,33 @@ error_kmem: static int pfm_bad_permissions(struct task_struct *task) { + const struct cred *tcred; uid_t uid = current_uid(); gid_t gid = current_gid(); + int ret; + + rcu_read_lock(); + tcred = __task_cred(task); /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", uid, gid, - task->euid, - task->suid, - task->uid, - task->egid, - task->sgid)); - - return (uid != task->euid) - || (uid != task->suid) - || (uid != task->uid) - || (gid != task->egid) - || (gid != task->sgid) - || (gid != task->gid)) && !capable(CAP_SYS_PTRACE); + tcred->euid, + tcred->suid, + tcred->uid, + tcred->egid, + tcred->sgid)); + + ret = ((uid != tcred->euid) + || (uid != tcred->suid) + || (uid != tcred->uid) + || (gid != tcred->egid) + || (gid != tcred->sgid) + || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); + + rcu_read_unlock(); + return ret; } static int diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 354c1ff17159..c5afc98e2675 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -106,6 +106,7 @@ void proc_id_connector(struct task_struct *task, int which_id) struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE]; struct timespec ts; + const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; @@ -115,14 +116,19 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; + rcu_read_lock(); + cred = __task_cred(task); if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->cred->uid; - ev->event_data.id.e.euid = task->cred->euid; + ev->event_data.id.r.ruid = cred->uid; + ev->event_data.id.e.euid = cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->cred->gid; - ev->event_data.id.e.egid = task->cred->egid; - } else + ev->event_data.id.r.rgid = cred->gid; + ev->event_data.id.e.egid = cred->egid; + } else { + rcu_read_unlock(); return; + } + rcu_read_unlock(); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0e6655613169..9142ff5dc8e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1f6e8c023b4c..45dabd59936f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1414,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1441,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/fcntl.c b/fs/fcntl.c index c594cc0e40fb..87c39f1f0817 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,10 +401,17 @@ static const long band_table[NSIGPOLL] = { static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { - return (((fown->euid == 0) || - (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || - (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + const struct cred *cred; + int ret; + + rcu_read_lock(); + cred = __task_cred(p); + ret = ((fown->euid == 0 || + fown->euid == cred->suid || fown->euid == cred->uid || + fown->uid == cred->suid || fown->uid == cred->uid) && + !security_file_send_sigiotask(p, fown, sig)); + rcu_read_unlock(); + return ret; } static void send_sigio_to_task(struct task_struct *p, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e97a98981862..95bc22bdd060 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, */ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) { + const struct cred *cred; + int ret; + if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->cred->euid == fc->user_id && - task->cred->suid == fc->user_id && - task->cred->uid == fc->user_id && - task->cred->egid == fc->group_id && - task->cred->sgid == fc->group_id && - task->cred->gid == fc->group_id) - return 1; + rcu_read_lock(); + ret = 0; + cred = __task_cred(task); + if (cred->euid == fc->user_id && + cred->suid == fc->user_id && + cred->uid == fc->user_id && + cred->egid == fc->group_id && + cred->sgid == fc->group_id && + cred->gid == fc->group_id) + ret = 1; + rcu_read_unlock(); - return 0; + return ret; } static int fuse_access(struct inode *inode, int mask) diff --git a/fs/ioprio.c b/fs/ioprio.c index 5112554fd210..3569e0ad86a2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) { int err; struct io_context *ioc; + const struct cred *cred = current_cred(), *tcred; - if (task->cred->uid != current_euid() && - task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) + rcu_read_lock(); + tcred = __task_cred(task); + if (tcred->uid != cred->euid && + tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); err = security_task_setioprio(task, ioprio); if (err) @@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->cred->uid != who) + if (__task_cred(p)->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->cred->uid != user->uid) + if (__task_cred(p)->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/proc/array.c b/fs/proc/array.c index 62fe9b2009b6..7e4877d9dcb5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct group_info *group_info; int g; struct fdtable *fdt = NULL; + const struct cred *cred; pid_t ppid, tpid; rcu_read_lock(); @@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } + cred = get_cred((struct cred *) __task_cred(p)); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, - p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); + cred->uid, cred->euid, cred->suid, cred->fsuid, + cred->gid, cred->egid, cred->sgid, cred->fsgid); task_lock(p); if (p->files) @@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->cred->group_info; - get_group_info(group_info); + group_info = cred->group_info; task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) seq_printf(m, "%d ", GROUP_AT(group_info, g)); - put_group_info(group_info); + put_cred(cred); seq_printf(m, "\n"); } @@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->cred->user->sigpending); + qsize = atomic_read(&__task_cred(p)->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,12 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - struct cred *cred = p->cred; + const struct cred *cred; + kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; - render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); - render_cap_t(m, "CapEff:\t", &cred->cap_effective); - render_cap_t(m, "CapBnd:\t", &cred->cap_bset); + rcu_read_lock(); + cred = __task_cred(p); + cap_inheritable = cred->cap_inheritable; + cap_permitted = cred->cap_permitted; + cap_effective = cred->cap_effective; + cap_bset = cred->cap_bset; + rcu_read_unlock(); + + render_cap_t(m, "CapInh:\t", &cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cap_permitted); + render_cap_t(m, "CapEff:\t", &cap_effective); + render_cap_t(m, "CapBnd:\t", &cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 6862b360c36c..cf42c42cbfbb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st { struct inode * inode; struct proc_inode *ei; + const struct cred *cred; /* We need a new inode */ @@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } security_task_to_inode(task, inode); @@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat { struct inode *inode = dentry->d_inode; struct task_struct *task; + const struct cred *cred; + generic_fillattr(inode, stat); rcu_read_lock(); @@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->cred->euid; - stat->gid = task->cred->egid; + cred = __task_cred(task); + stat->uid = cred->euid; + stat->gid = cred->egid; } } rcu_read_unlock(); @@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + const struct cred *cred; + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct task_struct *task = get_proc_task(inode); int fd = proc_fd(inode); struct files_struct *files; + const struct cred *cred; if (task) { files = get_files_struct(task); @@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/include/linux/cred.h b/include/linux/cred.h index 4221ec6000c1..166ce4ddba64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -147,8 +147,9 @@ static inline struct cred *get_cred(struct cred *cred) * Release a reference to a set of credentials, deleting them when the last ref * is released. */ -static inline void put_cred(struct cred *cred) +static inline void put_cred(const struct cred *_cred) { + struct cred *cred = (struct cred *) _cred; if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2febf5165fad..ae8ef88ade3f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,7 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { - struct cred *cred = tsk->cred; + const struct cred *cred = get_task_cred(tsk); int i, j, need_sid = 1; u32 sid; @@ -642,8 +642,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; } - if (!result) + if (!result) { + put_cred(cred); return 0; + } } if (rule->filterkey && ctx) ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); @@ -651,6 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_NEVER: *state = AUDIT_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; } + put_cred(cred); return 1; } @@ -1229,7 +1232,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - struct cred *cred = tsk->cred; + const struct cred *cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1239,13 +1242,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = cred->uid; - context->gid = cred->gid; - context->euid = cred->euid; - context->suid = cred->suid; + cred = current_cred(); + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; context->fsuid = cred->fsuid; - context->egid = cred->egid; - context->sgid = cred->sgid; + context->egid = cred->egid; + context->sgid = cred->sgid; context->fsgid = cred->fsgid; context->personality = tsk->personality; @@ -2088,7 +2092,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->cred->uid, + task->pid, task_uid(task), task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2471,7 +2475,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->cred->uid; + context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2490,6 +2494,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_aux_data_pids *axp; struct task_struct *tsk = current; struct audit_context *ctx = tsk->audit_context; + uid_t uid = current_uid(), t_uid = task_uid(t); if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { @@ -2497,7 +2502,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->cred->uid; + audit_sig_uid = uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2509,7 +2514,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->cred->uid; + ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2530,7 +2535,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->cred->uid; + axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e210526e6401..a512a75a5560 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1279,7 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; - uid_t euid; + const struct cred *cred = current_cred(), *tcred; int ret; if (pid) { @@ -1289,16 +1289,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); return -ESRCH; } - get_task_struct(tsk); - rcu_read_unlock(); - euid = current_euid(); - if (euid && - euid != tsk->cred->uid && - euid != tsk->cred->suid) { - put_task_struct(tsk); + tcred = __task_cred(tsk); + if (cred->euid && + cred->euid != tcred->uid && + cred->euid != tcred->suid) { + rcu_read_unlock(); return -EACCES; } + get_task_struct(tsk); + rcu_read_unlock(); } else { tsk = current; get_task_struct(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index e0f6e1892fb9..bbc22530f2c1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,10 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->cred->user->processes); + /* don't need to get the RCU readlock here - the process is dead and + * can't be modifying its own credentials */ + atomic_dec(&__task_cred(p)->user->processes); + proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1267,12 +1270,12 @@ static int wait_task_zombie(struct task_struct *p, int options, unsigned long state; int retval, status, traced; pid_t pid = task_pid_vnr(p); + uid_t uid = __task_cred(p)->uid; if (!likely(options & WEXITED)) return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1396,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->cred->uid, &infop->si_uid); + retval = put_user(uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1461,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->cred->uid; + /* don't need the RCU readlock here as we're holding a spinlock */ + uid = __task_cred(p)->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1532,10 +1536,10 @@ static int wait_task_continued(struct task_struct *p, int options, } if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; + uid = __task_cred(p)->uid; spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/futex.c b/kernel/futex.c index 28421d8210b8..4fe790e89d0f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -439,15 +439,20 @@ static void free_pi_state(struct futex_pi_state *pi_state) static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->cred->euid && - euid != p->cred->uid)) + if (!p) { p = ERR_PTR(-ESRCH); - else - get_task_struct(p); + } else { + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid) + p = ERR_PTR(-ESRCH); + else + get_task_struct(p); + } rcu_read_unlock(); @@ -1831,7 +1836,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, { struct robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1847,8 +1852,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 2c3fd5ed34f5..d607a5b9ee29 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -135,7 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 49849d12dd12..b9d5f4e4f6a4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,7 +115,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { - struct cred *cred = current->cred, *tcred = task->cred; + const struct cred *cred = current_cred(), *tcred; /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -125,19 +125,23 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid = cred->uid; - gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - if ((uid != tcred->euid || - uid != tcred->suid || - uid != tcred->uid || - gid != tcred->egid || - gid != tcred->sgid || - gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) + rcu_read_lock(); + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_PTRACE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); diff --git a/kernel/sched.c b/kernel/sched.c index 733c59e645aa..92992e287b10 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,9 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->cred->user->tg; + rcu_read_lock(); + tg = __task_cred(p)->user->tg; + rcu_read_unlock(); #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5121,6 +5123,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) set_load_weight(p); } +/* + * check the target process has a UID that matches the current process's + */ +static bool check_same_owner(struct task_struct *p) +{ + const struct cred *cred = current_cred(), *pcred; + bool match; + + rcu_read_lock(); + pcred = __task_cred(p); + match = (cred->euid == pcred->euid || + cred->euid == pcred->uid); + rcu_read_unlock(); + return match; +} + static int __sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param, bool user) { @@ -5128,7 +5146,6 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; - uid_t euid; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -5181,9 +5198,7 @@ recheck: return -EPERM; /* can't change other user's priorities */ - euid = current_euid(); - if (euid != p->cred->euid && - euid != p->cred->uid) + if (!check_same_owner(p)) return -EPERM; } @@ -5394,7 +5409,6 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) cpumask_t cpus_allowed; cpumask_t new_mask = *in_mask; struct task_struct *p; - uid_t euid; int retval; get_online_cpus(); @@ -5415,11 +5429,8 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - euid = current_euid(); retval = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && - !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 80e8a6489f97..84989124bafb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -177,6 +177,11 @@ int next_signal(struct sigpending *pending, sigset_t *mask) return sig; } +/* + * allocate a new signal queue record + * - this may be called without locks if and only if t == current, otherwise an + * appopriate lock must be held to protect t's user_struct + */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { @@ -184,11 +189,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, struct user_struct *user; /* - * In order to avoid problems with "switch_user()", we want to make - * sure that the compiler doesn't re-load "t->user" + * We won't get problems with the target's UID changing under us + * because changing it requires RCU be used, and if t != current, the + * caller must be holding the RCU readlock (by way of a spinlock) and + * we use RCU protection here */ - user = t->cred->user; - barrier(); + user = __task_cred(t)->user; atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -562,12 +568,13 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) /* * Bad permissions for sending the signal + * - the caller must hold at least the RCU read lock */ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + const struct cred *cred = current_cred(), *tcred; struct pid *sid; - uid_t uid, euid; int error; if (!valid_signal(sig)) @@ -580,10 +587,11 @@ static int check_kill_permission(int sig, struct siginfo *info, if (error) return error; - uid = current_uid(); - euid = current_euid(); - if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && - (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && + tcred = __task_cred(t); + if ((cred->euid ^ tcred->suid) && + (cred->euid ^ tcred->uid) && + (cred->uid ^ tcred->suid) && + (cred->uid ^ tcred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1011,6 +1019,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long return sighand; } +/* + * send signal info to all the members of a group + * - the caller must hold the RCU read lock at least + */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; @@ -1032,8 +1044,8 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) /* * __kill_pgrp_info() sends a signal to a process group: this is what the tty * control characters do (^C, ^Z etc) + * - the caller must hold at least a readlock on tasklist_lock */ - int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) { struct task_struct *p = NULL; @@ -1089,6 +1101,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, { int ret = -EINVAL; struct task_struct *p; + const struct cred *pcred; if (!valid_signal(sig)) return ret; @@ -1099,9 +1112,11 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, ret = -ESRCH; goto out_unlock; } - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->cred->suid) && (euid != p->cred->uid) - && (uid != p->cred->suid) && (uid != p->cred->uid)) { + pcred = __task_cred(p); + if ((info == SEND_SIG_NOINFO || + (!is_si_special(info) && SI_FROMUSER(info))) && + euid != pcred->suid && euid != pcred->uid && + uid != pcred->suid && uid != pcred->uid) { ret = -EPERM; goto out_unlock; } @@ -1372,10 +1387,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); info.si_stime = cputime_to_jiffies(cputime.stime); @@ -1443,10 +1457,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1726,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->cred->uid; + info->si_uid = task_uid(current->parent); } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index c4d6b59553e9..ccc9eb736d35 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -112,14 +112,17 @@ EXPORT_SYMBOL(cad_pid); void (*pm_power_off_prepare)(void); +/* + * set the priority of a task + * - the caller must hold the RCU read lock + */ static int set_one_prio(struct task_struct *p, int niceval, int error) { - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred = __task_cred(p); int no_nice; - if (p->cred->uid != euid && - p->cred->euid != euid && - !capable(CAP_SYS_NICE)) { + if (pcred->uid != cred->euid && + pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 6d1ed07bf312..2dc06ab35716 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -27,6 +27,7 @@ */ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) { + const struct cred *tcred; struct timespec uptime, ts; u64 ac_etime; @@ -53,10 +54,11 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->cred->uid; - stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); + tcred = __task_cred(tsk); + stats->ac_uid = tcred->uid; + stats->ac_gid = tcred->gid; stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b23492ee3e50..7555219c535b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,7 +1110,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; @@ -1145,14 +1145,16 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ diff --git a/mm/migrate.c b/mm/migrate.c index 794443da1b4f..142284229ce2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,7 +1045,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1076,14 +1076,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); err = security_task_movememory(task); if (err) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3af787ba2077..0e0b282a2073 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,9 +298,9 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->cred->uid, p->tgid, p->mm->total_vm, - get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, - p->comm); + p->pid, __task_cred(p)->uid, p->tgid, + p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), + p->oomkilladj, p->comm); task_unlock(p); } while_each_thread(g, p); } diff --git a/security/commoncap.c b/security/commoncap.c index 61307f590003..0384bf95db68 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -51,10 +51,13 @@ EXPORT_SYMBOL(cap_netlink_recv); */ int cap_capable(struct task_struct *tsk, int cap, int audit) { + __u32 cap_raised; + /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cred->cap_effective, cap)) - return 0; - return -EPERM; + rcu_read_lock(); + cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap); + rcu_read_unlock(); + return cap_raised ? 0 : -EPERM; } int cap_settime(struct timespec *ts, struct timezone *tz) @@ -66,34 +69,42 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted)) - return 0; - if (capable(CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted) && + !capable(CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_ptrace_traceme(struct task_struct *parent) { - if (cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted)) - return 0; - if (has_capability(parent, CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted) && + !has_capability(parent, CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - struct cred *cred = target->cred; + const struct cred *cred; /* Derived from kernel/capability.c:sys_capget. */ + rcu_read_lock(); + cred = __task_cred(target); *effective = cred->cap_effective; *inheritable = cred->cap_inheritable; *permitted = cred->cap_permitted; + rcu_read_unlock(); return 0; } @@ -433,7 +444,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) int cap_bprm_secureexec (struct linux_binprm *bprm) { - const struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if (cred->uid != 0) { if (bprm->cap_effective) @@ -511,11 +522,11 @@ static inline void cap_emulate_setxuid (int old_ruid, int old_euid, if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (cred->cap_permitted); - cap_clear (cred->cap_effective); + cap_clear(cred->cap_permitted); + cap_clear(cred->cap_effective); } if (old_euid == 0 && cred->euid != 0) { - cap_clear (cred->cap_effective); + cap_clear(cred->cap_effective); } if (old_euid != 0 && cred->euid == 0) { cred->cap_effective = cred->cap_permitted; @@ -582,9 +593,14 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cred->cap_permitted, - current->cred->cap_permitted) && - !capable(CAP_SYS_NICE)) + int is_subset; + + rcu_read_lock(); + is_subset = cap_issubset(__task_cred(p)->cap_permitted, + current_cred()->cap_permitted); + rcu_read_unlock(); + + if (!is_subset && !capable(CAP_SYS_NICE)) return -EPERM; return 0; } diff --git a/security/keys/permission.c b/security/keys/permission.c index baf3d5f31e71..13c36164f284 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,13 +22,16 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { - struct cred *cred = context->cred; + const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); + rcu_read_lock(); + cred = __task_cred(context); + /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -43,10 +46,7 @@ int key_task_permission(const key_ref_t key_ref, goto use_these_perms; } - spin_lock(&cred->lock); ret = groups_search(cred->group_info, key->gid); - spin_unlock(&cred->lock); - if (ret) { kperm = key->perm >> 8; goto use_these_perms; @@ -57,6 +57,8 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: + rcu_read_lock(); + /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index ce8ac6073d57..212601ebaa46 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -412,10 +412,13 @@ key_ref_t search_process_keyrings(struct key_type *type, struct task_struct *context) { struct request_key_auth *rka; + struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); + cred = get_task_cred(context); + /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -428,9 +431,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->cred->thread_keyring) { + if (cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->thread_keyring, 1), + make_key_ref(cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -495,9 +498,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->cred->user->session_keyring) { + else if (cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->user->session_keyring, 1), + make_key_ref(cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -519,20 +522,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->cred->request_key_auth && + if (cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); - if (key_validate(context->cred->request_key_auth) == 0) { - rka = context->cred->request_key_auth->payload.data; + if (key_validate(cred->request_key_auth) == 0) { + rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -549,7 +552,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } } @@ -557,6 +560,7 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: + put_cred(cred); return key_ref; } /* end search_process_keyrings() */ diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 10715d1330b9..c86303638235 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -95,13 +95,18 @@ extern void selnl_notify_setenforce(int val); static int task_has_security(struct task_struct *tsk, u32 perms) { - struct task_security_struct *tsec; - - tsec = tsk->cred->security; + const struct task_security_struct *tsec; + u32 sid = 0; + + rcu_read_lock(); + tsec = __task_cred(tsk)->security; + if (tsec) + sid = tsec->sid; + rcu_read_unlock(); if (!tsec) return -EACCES; - return avc_has_perm(tsec->sid, SECINITSID_SECURITY, + return avc_has_perm(sid, SECINITSID_SECURITY, SECCLASS_SECURITY, perms, NULL); } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e8a4fcb1ad04..11167fd567b9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -30,6 +30,8 @@ #include "smack.h" +#define task_security(task) (task_cred_xxx((task), security)) + /* * I hope these are the hokeyist lines of code in the module. Casey. */ @@ -1012,7 +1014,7 @@ static void smack_cred_free(struct cred *cred) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1023,7 +1025,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1034,7 +1036,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1046,7 +1048,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->cred->security); + *secid = smack_to_secid(task_security(p)); } /** @@ -1062,7 +1064,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1079,7 +1081,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1091,7 +1093,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1109,7 +1111,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1121,7 +1123,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1132,7 +1134,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1155,13 +1157,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); } /** @@ -1174,7 +1176,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); + rc = smk_access(current_security(), task_security(p), MAY_WRITE); if (rc == 0) return 0; @@ -1205,7 +1207,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->cred->security; + isp->smk_inode = task_security(p); } /* @@ -2010,7 +2012,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->cred->security, GFP_KERNEL); + cp = kstrdup(task_security(p), GFP_KERNEL); if (cp == NULL) return -ENOMEM; -- cgit v1.2.3 From bb952bb98a7e479262c7eb25d5592545a3af147d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:20 +1100 Subject: CRED: Separate per-task-group keyrings from signal_struct Separate per-task-group keyrings from signal_struct and dangle their anchor from the cred struct rather than the signal_struct. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/cred.h | 16 +++++++ include/linux/key.h | 8 +--- include/linux/sched.h | 6 --- kernel/cred.c | 63 +++++++++++++++++++++++++++ kernel/fork.c | 7 --- security/keys/process_keys.c | 100 +++++++++++++++++-------------------------- security/keys/request_key.c | 34 ++++++--------- 7 files changed, 135 insertions(+), 99 deletions(-) (limited to 'kernel') diff --git a/include/linux/cred.h b/include/linux/cred.h index 166ce4ddba64..62b9e532422d 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -71,6 +71,21 @@ extern int groups_search(const struct group_info *, gid_t); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); +/* + * The common credentials for a thread group + * - shared by CLONE_THREAD + */ +#ifdef CONFIG_KEYS +struct thread_group_cred { + atomic_t usage; + pid_t tgid; /* thread group process ID */ + spinlock_t lock; + struct key *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ + struct rcu_head rcu; /* RCU deletion hook */ +}; +#endif + /* * The security context of a task * @@ -114,6 +129,7 @@ struct cred { * keys to */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ + struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ diff --git a/include/linux/key.h b/include/linux/key.h index df709e1af3cd..0836cc838b0c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,9 +278,7 @@ extern ctl_table key_sysctls[]; */ extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern int copy_thread_group_keys(struct task_struct *tsk); extern void exit_keys(struct task_struct *tsk); -extern void exit_thread_group_keys(struct signal_struct *tg); extern int suid_keys(struct task_struct *tsk); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); @@ -289,8 +287,8 @@ extern void key_init(void); #define __install_session_keyring(keyring) \ ({ \ - struct key *old_session = current->signal->session_keyring; \ - current->signal->session_keyring = keyring; \ + struct key *old_session = current->cred->tgcred->session_keyring; \ + current->cred->tgcred->session_keyring = keyring; \ old_session; \ }) @@ -308,9 +306,7 @@ extern void key_init(void); #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 -#define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) -#define exit_thread_group_keys(tg) do { } while(0) #define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 740cf946c8cc..2913252989b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -571,12 +571,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - /* keep the process-shared keyrings here so that they do the right - * thing in threads created with CLONE_THREAD */ -#ifdef CONFIG_KEYS - struct key *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ -#endif #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif diff --git a/kernel/cred.c b/kernel/cred.c index 833244a7cb05..ac73e3617684 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -16,6 +16,17 @@ #include #include +/* + * The common credentials for the initial task's thread group + */ +#ifdef CONFIG_KEYS +static struct thread_group_cred init_tgcred = { + .usage = ATOMIC_INIT(2), + .tgid = 0, + .lock = SPIN_LOCK_UNLOCKED, +}; +#endif + /* * The initial credentials for the initial task */ @@ -28,8 +39,41 @@ struct cred init_cred = { .cap_bset = CAP_INIT_BSET, .user = INIT_USER, .group_info = &init_groups, +#ifdef CONFIG_KEYS + .tgcred = &init_tgcred, +#endif }; +/* + * Dispose of the shared task group credentials + */ +#ifdef CONFIG_KEYS +static void release_tgcred_rcu(struct rcu_head *rcu) +{ + struct thread_group_cred *tgcred = + container_of(rcu, struct thread_group_cred, rcu); + + BUG_ON(atomic_read(&tgcred->usage) != 0); + + key_put(tgcred->session_keyring); + key_put(tgcred->process_keyring); + kfree(tgcred); +} +#endif + +/* + * Release a set of thread group credentials. + */ +static void release_tgcred(struct cred *cred) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = cred->tgcred; + + if (atomic_dec_and_test(&tgcred->usage)) + call_rcu(&tgcred->rcu, release_tgcred_rcu); +#endif +} + /* * The RCU callback to actually dispose of a set of credentials */ @@ -41,6 +85,7 @@ static void put_cred_rcu(struct rcu_head *rcu) key_put(cred->thread_keyring); key_put(cred->request_key_auth); + release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); security_cred_free(cred); @@ -71,12 +116,30 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!pcred) return -ENOMEM; +#ifdef CONFIG_KEYS + if (clone_flags & CLONE_THREAD) { + atomic_inc(&pcred->tgcred->usage); + } else { + pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); + if (!pcred->tgcred) { + kfree(pcred); + return -ENOMEM; + } + atomic_set(&pcred->tgcred->usage, 1); + spin_lock_init(&pcred->tgcred->lock); + pcred->tgcred->process_keyring = NULL; + pcred->tgcred->session_keyring = + key_get(p->cred->tgcred->session_keyring); + } +#endif + #ifdef CONFIG_SECURITY pcred->security = NULL; #endif ret = security_cred_alloc(pcred); if (ret < 0) { + release_tgcred(pcred); kfree(pcred); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index c932e283ddfc..ded1972672a3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -802,12 +802,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) if (!sig) return -ENOMEM; - ret = copy_thread_group_keys(tsk); - if (ret < 0) { - kmem_cache_free(signal_cachep, sig); - return ret; - } - atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); @@ -852,7 +846,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { thread_group_cputime_free(sig); - exit_thread_group_keys(sig); tty_kref_put(sig->tty); kmem_cache_free(signal_cachep, sig); } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 212601ebaa46..70ee93406f30 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -189,7 +189,7 @@ int install_process_keyring(void) might_sleep(); - if (!tsk->signal->process_keyring) { + if (!tsk->cred->tgcred->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, @@ -200,12 +200,12 @@ int install_process_keyring(void) } /* attach keyring */ - spin_lock_irq(&tsk->sighand->siglock); - if (!tsk->signal->process_keyring) { - tsk->signal->process_keyring = keyring; + spin_lock_irq(&tsk->cred->tgcred->lock); + if (!tsk->cred->tgcred->process_keyring) { + tsk->cred->tgcred->process_keyring = keyring; keyring = NULL; } - spin_unlock_irq(&tsk->sighand->siglock); + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(keyring); } @@ -235,11 +235,11 @@ static int install_session_keyring(struct key *keyring) sprintf(buf, "_ses.%u", tsk->tgid); flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->signal->session_keyring) + if (tsk->cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - flags, NULL); + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, + tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } @@ -248,10 +248,10 @@ static int install_session_keyring(struct key *keyring) } /* install the keyring */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->session_keyring; - rcu_assign_pointer(tsk->signal->session_keyring, keyring); - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->session_keyring; + rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&tsk->cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -264,28 +264,6 @@ static int install_session_keyring(struct key *keyring) } /* end install_session_keyring() */ -/*****************************************************************************/ -/* - * copy the keys in a thread group for fork without CLONE_THREAD - */ -int copy_thread_group_keys(struct task_struct *tsk) -{ - key_check(current->thread_group->session_keyring); - key_check(current->thread_group->process_keyring); - - /* no process keyring yet */ - tsk->signal->process_keyring = NULL; - - /* same session keyring */ - rcu_read_lock(); - tsk->signal->session_keyring = - key_get(rcu_dereference(current->signal->session_keyring)); - rcu_read_unlock(); - - return 0; - -} /* end copy_thread_group_keys() */ - /*****************************************************************************/ /* * copy the keys for fork @@ -305,17 +283,6 @@ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) } /* end copy_keys() */ -/*****************************************************************************/ -/* - * dispose of thread group keys upon thread group destruction - */ -void exit_thread_group_keys(struct signal_struct *tg) -{ - key_put(tg->session_keyring); - key_put(tg->process_keyring); - -} /* end exit_thread_group_keys() */ - /*****************************************************************************/ /* * dispose of per-thread keys upon thread exit @@ -344,10 +311,10 @@ int exec_keys(struct task_struct *tsk) key_put(old); /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->process_keyring; - tsk->signal->process_keyring = NULL; - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->process_keyring; + tsk->cred->tgcred->process_keyring = NULL; + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(old); @@ -452,9 +419,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (context->signal->process_keyring) { + if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->signal->process_keyring, 1), + make_key_ref(cred->tgcred->process_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -473,11 +440,11 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (context->signal->session_keyring) { + if (cred->tgcred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( make_key_ref(rcu_dereference( - context->signal->session_keyring), + cred->tgcred->session_keyring), 1), context, type, description, match); rcu_read_unlock(); @@ -586,11 +553,13 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = current_cred(); + struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; +try_again: + cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { @@ -604,6 +573,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } key = cred->thread_keyring; @@ -612,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_PROCESS_KEYRING: - if (!t->signal->process_keyring) { + if (!cred->tgcred->process_keyring) { if (!create) goto error; @@ -621,15 +591,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } - key = t->signal->process_keyring; + key = cred->tgcred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!t->signal->session_keyring) { + if (!cred->tgcred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -639,10 +610,11 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, cred->user->session_keyring); if (ret < 0) goto error; + goto reget_creds; } rcu_read_lock(); - key = rcu_dereference(t->signal->session_keyring); + key = rcu_dereference(cred->tgcred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -758,6 +730,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto invalid_key; error: + put_cred(cred); return key_ref; invalid_key: @@ -765,6 +738,12 @@ invalid_key: key_ref = ERR_PTR(ret); goto error; + /* if we attempted to install a keyring, then it may have caused new + * creds to be installed */ +reget_creds: + put_cred(cred); + goto try_again; + } /* end lookup_user_key() */ /*****************************************************************************/ @@ -777,6 +756,7 @@ invalid_key: long join_session_keyring(const char *name) { struct task_struct *tsk = current; + struct cred *cred = current->cred; struct key *keyring; long ret; @@ -787,7 +767,7 @@ long join_session_keyring(const char *name) goto error; rcu_read_lock(); - ret = rcu_dereference(tsk->signal->session_keyring)->serial; + ret = rcu_dereference(cred->tgcred->session_keyring)->serial; rcu_read_unlock(); goto error; } @@ -799,7 +779,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, + keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 0488b0af5bd6..3d12558362df 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -66,7 +66,6 @@ static int call_sbin_request_key(struct key_construction *cons, const char *op, void *aux) { - struct task_struct *tsk = current; const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; @@ -109,18 +108,13 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring->serial : 0); prkey = 0; - if (tsk->signal->process_keyring) - prkey = tsk->signal->process_keyring->serial; + if (cred->tgcred->process_keyring) + prkey = cred->tgcred->process_keyring->serial; - sprintf(keyring_str[1], "%d", prkey); - - if (tsk->signal->session_keyring) { - rcu_read_lock(); - sskey = rcu_dereference(tsk->signal->session_keyring)->serial; - rcu_read_unlock(); - } else { + if (cred->tgcred->session_keyring) + sskey = rcu_dereference(cred->tgcred->session_keyring)->serial; + else sskey = cred->user->session_keyring->serial; - } sprintf(keyring_str[2], "%d", sskey); @@ -222,7 +216,7 @@ static int construct_key(struct key *key, const void *callout_info, static void construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; kenter("%p", dest_keyring); @@ -234,11 +228,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->cred->jit_keyring) { + switch (cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->cred->request_key_auth) { - authkey = tsk->cred->request_key_auth; + if (cred->request_key_auth) { + authkey = cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -251,19 +245,19 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->cred->thread_keyring); + dest_keyring = key_get(cred->thread_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(tsk->signal->process_keyring); + dest_keyring = key_get(cred->tgcred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(tsk->signal->session_keyring)); + rcu_dereference(cred->tgcred->session_keyring)); rcu_read_unlock(); if (dest_keyring) @@ -271,11 +265,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: dest_keyring = - key_get(tsk->cred->user->session_keyring); + key_get(cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->cred->user->uid_keyring); + dest_keyring = key_get(cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: -- cgit v1.2.3 From 6cc88bc45ce8043171089c9592da223dfab91823 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:21 +1100 Subject: CRED: Rename is_single_threaded() to is_wq_single_threaded() Rename is_single_threaded() to is_wq_single_threaded() so that a new is_single_threaded() can be created that refers to tasks rather than waitqueues. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- kernel/workqueue.c | 8 ++++---- lib/is_single_threaded.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 lib/is_single_threaded.c (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f928f2a87b9b..f12ab5c4dec4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -84,21 +84,21 @@ static cpumask_t cpu_singlethread_map __read_mostly; static cpumask_t cpu_populated_map __read_mostly; /* If it's single threaded, it isn't in the list of workqueues. */ -static inline int is_single_threaded(struct workqueue_struct *wq) +static inline int is_wq_single_threaded(struct workqueue_struct *wq) { return wq->singlethread; } static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq) { - return is_single_threaded(wq) + return is_wq_single_threaded(wq) ? &cpu_singlethread_map : &cpu_populated_map; } static struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu) { - if (unlikely(is_single_threaded(wq))) + if (unlikely(is_wq_single_threaded(wq))) cpu = singlethread_cpu; return per_cpu_ptr(wq->cpu_wq, cpu); } @@ -769,7 +769,7 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct workqueue_struct *wq = cwq->wq; - const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; + const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d"; struct task_struct *p; p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu); diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c new file mode 100644 index 000000000000..f1ed2fe76c65 --- /dev/null +++ b/lib/is_single_threaded.c @@ -0,0 +1,45 @@ +/* Function to determine if a thread group is single threaded or not + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from security/selinux/hooks.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include + +/** + * is_single_threaded - Determine if a thread group is single-threaded or not + * @p: A task in the thread group in question + * + * This returns true if the thread group to which a task belongs is single + * threaded, false if it is not. + */ +bool is_single_threaded(struct task_struct *p) +{ + struct task_struct *g, *t; + struct mm_struct *mm = p->mm; + + if (atomic_read(&p->signal->count) != 1) + goto no; + + if (atomic_read(&p->mm->mm_users) != 1) { + read_lock(&tasklist_lock); + do_each_thread(g, t) { + if (t->mm == mm && t != p) + goto no_unlock; + } while_each_thread(g, t); + read_unlock(&tasklist_lock); + } + + return true; + +no_unlock: + read_unlock(&tasklist_lock); +no: + return false; +} -- cgit v1.2.3 From d84f4f992cbd76e8f39c488cf0c5d123843923b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:23 +1100 Subject: CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- fs/exec.c | 31 ++- fs/nfsd/auth.c | 92 ++++---- fs/nfsd/nfs4recover.c | 68 +++--- fs/nfsd/nfsfh.c | 11 +- fs/open.c | 31 ++- include/linux/audit.h | 22 +- include/linux/capability.h | 2 - include/linux/cred.h | 44 +++- include/linux/init_task.h | 2 + include/linux/key.h | 22 +- include/linux/sched.h | 6 +- include/linux/security.h | 178 +++++++--------- init/main.c | 1 + kernel/auditsc.c | 42 ++-- kernel/capability.c | 78 +++---- kernel/cred-internals.h | 21 ++ kernel/cred.c | 321 ++++++++++++++++++++++++---- kernel/exit.c | 9 +- kernel/fork.c | 7 +- kernel/kmod.c | 30 ++- kernel/ptrace.c | 9 + kernel/signal.c | 10 +- kernel/sys.c | 450 +++++++++++++++++++++------------------ kernel/user.c | 37 +--- kernel/user_namespace.c | 12 +- lib/Makefile | 2 +- net/rxrpc/ar-key.c | 6 +- security/capability.c | 21 +- security/commoncap.c | 265 +++++++++++------------ security/keys/internal.h | 17 +- security/keys/key.c | 25 +-- security/keys/keyctl.c | 95 ++++++--- security/keys/keyring.c | 14 +- security/keys/permission.c | 24 ++- security/keys/proc.c | 8 +- security/keys/process_keys.c | 333 ++++++++++++++--------------- security/keys/request_key.c | 29 ++- security/keys/request_key_auth.c | 41 ++-- security/security.c | 58 +++-- security/selinux/hooks.c | 286 ++++++++++++------------- security/smack/smack_lsm.c | 82 ++++--- 41 files changed, 1603 insertions(+), 1239 deletions(-) create mode 100644 kernel/cred-internals.h (limited to 'kernel') diff --git a/fs/exec.c b/fs/exec.c index a5330e1a2216..9bd3559ddece 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1007,13 +1007,12 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) { - suid_keys(current); + if (bprm->e_uid != current_euid() || + bprm->e_gid != current_egid()) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { - suid_keys(current); set_dumpable(current->mm, suid_dumpable); } @@ -1096,10 +1095,8 @@ void compute_creds(struct linux_binprm *bprm) { int unsafe; - if (bprm->e_uid != current_uid()) { - suid_keys(current); + if (bprm->e_uid != current_uid()) current->pdeath_signal = 0; - } exec_keys(current); task_lock(current); @@ -1709,8 +1706,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct linux_binfmt * binfmt; struct inode * inode; struct file * file; + const struct cred *old_cred; + struct cred *cred; int retval = 0; - int fsuid = current_fsuid(); int flag = 0; int ispipe = 0; unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; @@ -1723,12 +1721,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; + + cred = prepare_creds(); + if (!cred) { + retval = -ENOMEM; + goto fail; + } + down_write(&mm->mmap_sem); /* * If another thread got here first, or we are not dumpable, bail out. */ if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); + put_cred(cred); goto fail; } @@ -1739,12 +1745,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->cred->fsuid = 0; /* Dump root private */ + cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); - if (retval < 0) + if (retval < 0) { + put_cred(cred); goto fail; + } + + old_cred = override_creds(cred); /* * Clear any false indication of pending signals that might @@ -1835,7 +1845,8 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->cred->fsuid = fsuid; + revert_creds(old_cred); + put_cred(cred); coredump_finish(mm); fail: return retval; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 808fc03a6fbd..836ffa1047d9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,55 +27,67 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { - struct cred *act_as = current->cred ; - struct svc_cred cred = rqstp->rq_cred; + struct group_info *rqgi; + struct group_info *gi; + struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); int ret; + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; + + rqgi = rqstp->rq_cred.cr_group_info; + if (flags & NFSEXP_ALLSQUASH) { - cred.cr_uid = exp->ex_anon_uid; - cred.cr_gid = exp->ex_anon_gid; - cred.cr_group_info = groups_alloc(0); + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; + gi = groups_alloc(0); } else if (flags & NFSEXP_ROOTSQUASH) { - struct group_info *gi; - if (!cred.cr_uid) - cred.cr_uid = exp->ex_anon_uid; - if (!cred.cr_gid) - cred.cr_gid = exp->ex_anon_gid; - gi = groups_alloc(cred.cr_group_info->ngroups); - if (gi) - for (i = 0; i < cred.cr_group_info->ngroups; i++) { - if (!GROUP_AT(cred.cr_group_info, i)) - GROUP_AT(gi, i) = exp->ex_anon_gid; - else - GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i); - } - cred.cr_group_info = gi; - } else - get_group_info(cred.cr_group_info); - - if (cred.cr_uid != (uid_t) -1) - act_as->fsuid = cred.cr_uid; - else - act_as->fsuid = exp->ex_anon_uid; - if (cred.cr_gid != (gid_t) -1) - act_as->fsgid = cred.cr_gid; - else - act_as->fsgid = exp->ex_anon_gid; + if (!new->fsuid) + new->fsuid = exp->ex_anon_uid; + if (!new->fsgid) + new->fsgid = exp->ex_anon_gid; - if (!cred.cr_group_info) - return -ENOMEM; - ret = set_groups(act_as, cred.cr_group_info); - put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { - act_as->cap_effective = - cap_drop_nfsd_set(act_as->cap_effective); + gi = groups_alloc(rqgi->ngroups); + if (!gi) + goto oom; + + for (i = 0; i < rqgi->ngroups; i++) { + if (!GROUP_AT(rqgi, i)) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(rqgi, i); + } } else { - act_as->cap_effective = - cap_raise_nfsd_set(act_as->cap_effective, - act_as->cap_permitted); + gi = get_group_info(rqgi); } + + if (new->fsuid == (uid_t) -1) + new->fsuid = exp->ex_anon_uid; + if (new->fsgid == (gid_t) -1) + new->fsgid = exp->ex_anon_gid; + + ret = set_groups(new, gi); + put_group_info(gi); + if (!ret) + goto error; + + if (new->uid) + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + return commit_creds(new); + +oom: + ret = -ENOMEM; +error: + abort_creds(new); return ret; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 632a50b4b371..9371ea12d7fa 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -54,20 +54,26 @@ static struct path rec_dir; static int rec_dir_init = 0; -static void -nfs4_save_user(uid_t *saveuid, gid_t *savegid) +static int +nfs4_save_creds(const struct cred **original_creds) { - *saveuid = current->cred->fsuid; - *savegid = current->cred->fsgid; - current->cred->fsuid = 0; - current->cred->fsgid = 0; + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = 0; + new->fsgid = 0; + *original_creds = override_creds(new); + put_cred(new); + return 0; } static void -nfs4_reset_user(uid_t saveuid, gid_t savegid) +nfs4_reset_creds(const struct cred *original) { - current->cred->fsuid = saveuid; - current->cred->fsgid = savegid; + revert_creds(original); } static void @@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void) int nfsd4_create_clid_dir(struct nfs4_client *clp) { + const struct cred *original_cred; char *dname = clp->cl_recdir; struct dentry *dentry; - uid_t uid; - gid_t gid; int status; dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); @@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || clp->cl_firststate) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; /* lock the parent */ mutex_lock(&rec_dir.dentry->d_inode->i_mutex); @@ -168,7 +175,7 @@ out_unlock: clp->cl_firststate = 1; nfsd4_sync_rec_dir(); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); return status; } @@ -211,20 +218,21 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen, static int nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) { + const struct cred *original_cred; struct file *filp; struct dentry_list_arg dla = { .parent = dir, }; struct list_head *dentries = &dla.dentries; struct dentry_list *child; - uid_t uid; - gid_t gid; int status; if (!rec_dir_init) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, current_cred()); @@ -250,7 +258,7 @@ out: dput(child->dentry); kfree(child); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); return status; } @@ -312,8 +320,7 @@ out: void nfsd4_remove_clid_dir(struct nfs4_client *clp) { - uid_t uid; - gid_t gid; + const struct cred *original_cred; int status; if (!rec_dir_init || !clp->cl_firststate) @@ -323,9 +330,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (status) goto out; clp->cl_firststate = 0; - nfs4_save_user(&uid, &gid); + + status = nfs4_save_creds(&original_cred); + if (status < 0) + goto out; + status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); if (status == 0) nfsd4_sync_rec_dir(); mnt_drop_write(rec_dir.mnt); @@ -402,16 +413,21 @@ nfsd4_recdir_load(void) { void nfsd4_init_recdir(char *rec_dirname) { - uid_t uid = 0; - gid_t gid = 0; - int status; + const struct cred *original_cred; + int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", rec_dirname); BUG_ON(rec_dir_init); - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) { + printk("NFSD: Unable to change credentials to find recovery" + " directory: error %d\n", + status); + return; + } status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &rec_dir); @@ -421,7 +437,7 @@ nfsd4_init_recdir(char *rec_dirname) if (!status) rec_dir_init = 1; - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); } void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e67cfaea0865..f0da7d9c3a92 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cred->cap_effective = - cap_raise_nfsd_set(current->cred->cap_effective, - current->cred->cap_permitted); + struct cred *new = prepare_creds(); + if (!new) + return nfserrno(-ENOMEM); + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + put_cred(override_creds(new)); + put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index f96eaab280a3..c0a426d5766c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,30 +425,33 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { - struct cred *cred = current->cred; + const struct cred *old_cred; + struct cred *override_cred; struct path path; struct inode *inode; - int old_fsuid, old_fsgid; - kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = cred->fsuid; - old_fsgid = cred->fsgid; + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; - cred->fsuid = cred->uid; - cred->fsgid = cred->gid; + override_cred->fsuid = override_cred->uid; + override_cred->fsgid = override_cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->cred->uid) - old_cap = cap_set_effective(__cap_empty_set); + if (override_cred->uid) + cap_clear(override_cred->cap_effective); else - old_cap = cap_set_effective(cred->cap_permitted); + override_cred->cap_effective = + override_cred->cap_permitted; } + old_cred = override_creds(override_cred); + res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); if (res) goto out; @@ -485,12 +488,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - cred->fsuid = old_fsuid; - cred->fsgid = old_fsgid; - - if (!issecure(SECURE_NO_SETUID_FIXUP)) - cap_set_effective(old_cap); - + revert_creds(old_cred); + put_cred(override_cred); return res; } diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fbebac7b1bf..0b2fcb698a63 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -454,8 +454,10 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); -extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); -extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); +extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old); +extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -522,16 +524,20 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) * * -Eric */ -static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - __audit_log_bprm_fcaps(bprm, pP, pE); + return __audit_log_bprm_fcaps(bprm, new, old); + return 0; } -static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +static inline int audit_log_capset(pid_t pid, const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - return __audit_log_capset(pid, eff, inh, perm); + return __audit_log_capset(pid, new, old); return 0; } @@ -566,8 +572,8 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) -#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) -#define audit_log_capset(pid, e, i, p) ({ 0; }) +#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) +#define audit_log_capset(pid, ncr, ocr) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/include/linux/capability.h b/include/linux/capability.h index 7f26580a5a4d..e22f48c2a46f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -519,8 +519,6 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); - /** * has_capability - Determine if a task has a superior capability available * @t: The task in question diff --git a/include/linux/cred.h b/include/linux/cred.h index 62b9e532422d..eaf6fa695a04 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,6 +84,8 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; + +extern void release_tgcred(struct cred *cred); #endif /* @@ -137,11 +139,30 @@ struct cred { struct user_struct *user; /* real user ID subscription */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ - spinlock_t lock; /* lock for pointer changes */ }; extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); +extern struct cred *prepare_creds(void); +extern struct cred *prepare_usermodehelper_creds(void); +extern int commit_creds(struct cred *); +extern void abort_creds(struct cred *); +extern const struct cred *override_creds(const struct cred *) __deprecated; +extern void revert_creds(const struct cred *) __deprecated; +extern void __init cred_init(void); + +/** + * get_new_cred - Get a reference on a new set of credentials + * @cred: The new credentials to reference + * + * Get a reference on the specified set of new credentials. The caller must + * release the reference. + */ +static inline struct cred *get_new_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} /** * get_cred - Get a reference on a set of credentials @@ -150,10 +171,9 @@ extern int copy_creds(struct task_struct *, unsigned long); * Get a reference on the specified set of credentials. The caller must * release the reference. */ -static inline struct cred *get_cred(struct cred *cred) +static inline const struct cred *get_cred(const struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred((struct cred *) cred); } /** @@ -166,6 +186,8 @@ static inline struct cred *get_cred(struct cred *cred) static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; + + BUG_ON(atomic_read(&(cred)->usage) <= 0); if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } @@ -250,13 +272,13 @@ static inline void put_cred(const struct cred *_cred) __groups; \ }) -#define task_cred_xxx(task, xxx) \ -({ \ - __typeof__(task->cred->xxx) ___val; \ - rcu_read_lock(); \ - ___val = __task_cred((task))->xxx; \ - rcu_read_unlock(); \ - ___val; \ +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(((struct cred *)NULL)->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ }) #define task_uid(task) (task_cred_xxx((task), uid)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5e24c54b6dfd..08c3b24ad9a8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,6 +150,8 @@ extern struct cred init_cred; .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ .cred = &init_cred, \ + .cred_exec_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/key.h b/include/linux/key.h index 0836cc838b0c..69ecf0934b02 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -73,6 +73,7 @@ struct key; struct seq_file; struct user_struct; struct signal_struct; +struct cred; struct key_type; struct key_owner; @@ -181,7 +182,7 @@ struct key { extern struct key *key_alloc(struct key_type *type, const char *desc, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, key_perm_t perm, unsigned long flags); @@ -249,7 +250,7 @@ extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, unsigned long flags, struct key *dest); @@ -276,22 +277,12 @@ extern ctl_table key_sysctls[]; /* * the userspace interface */ -extern void switch_uid_keyring(struct user_struct *new_user); -extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern void exit_keys(struct task_struct *tsk); -extern int suid_keys(struct task_struct *tsk); +extern int install_thread_keyring_to_cred(struct cred *cred); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(keyring) \ -({ \ - struct key *old_session = current->cred->tgcred->session_keyring; \ - current->cred->tgcred->session_keyring = keyring; \ - old_session; \ -}) - #else /* CONFIG_KEYS */ #define key_validate(k) 0 @@ -303,11 +294,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(k) ({ NULL; }) -#define copy_keys(f,t) 0 -#define exit_keys(t) do { } while(0) -#define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2913252989b3..121d655e460d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,8 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred *cred; /* actual/objective task credentials */ + const struct cred *cred; /* actual/objective task credentials (COW) */ + struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1720,7 +1721,6 @@ static inline struct user_struct *get_uid(struct user_struct *u) return u; } extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); extern void release_uids(struct user_namespace *ns); #include @@ -1870,6 +1870,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) +extern bool is_single_threaded(struct task_struct *); + /* * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. diff --git a/include/linux/security.h b/include/linux/security.h index 7e9fe046a0d1..68be11251447 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,24 +53,21 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -extern void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +extern int cap_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); -extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); -extern void cap_task_reparent_to_init(struct task_struct *p); +extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); + unsigned long arg4, unsigned long arg5); extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); @@ -170,8 +167,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Compute and set the security attributes of a process being transformed * by an execve operation based on the old attributes (current->security) * and the information saved in @bprm->security by the set_security hook. - * Since this hook function (and its caller) are void, this hook can not - * return an error. However, it can leave the security attributes of the + * Since this function may return an error, in which case the process will + * be killed. However, it can leave the security attributes of the * process unchanged if an access failure occurs at this point. * bprm_apply_creds is called under task_lock. @unsafe indicates various * reasons why it may be unsafe to change security state. @@ -593,15 +590,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @cred_alloc_security: - * @cred contains the cred struct for child process. - * Allocate and attach a security structure to the cred->security field. - * The security field is initialized to NULL when the task structure is - * allocated. - * Return 0 if operation was successful. * @cred_free: * @cred points to the credentials. * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_commit: + * @new points to the new credentials. + * @old points to the original credentials. + * Install a new set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -614,15 +614,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @id2 contains a uid. * @flags contains one of the LSM_SETID_* values. * Return 0 if permission is granted. - * @task_post_setuid: + * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter * indicates which of the set*uid system calls invoked this hook. If - * @flags is LSM_SETID_FS, then @old_ruid is the old fs uid and the other - * parameters are not used. - * @old_ruid contains the old real uid (or fs uid if LSM_SETID_FS). - * @old_euid contains the old effective uid (or -1 if LSM_SETID_FS). - * @old_suid contains the old saved uid (or -1 if LSM_SETID_FS). + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces * @flags contains one of the LSM_SETID_* values. * Return 0 on success. * @task_setgid: @@ -725,13 +723,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @arg3 contains a argument. * @arg4 contains a argument. * @arg5 contains a argument. - * @rc_p contains a pointer to communicate back the forced return code - * Return 0 if permission is granted, and non-zero if the security module - * has taken responsibility (setting *rc_p) for the prctl call. - * @task_reparent_to_init: - * Set the security attributes in @p->security for a kernel thread that - * is being reparented to the init task. - * @p contains the task_struct for the kernel thread. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. * @task_to_inode: * Set the security attributes for an inode based on an associated task's * security attributes, e.g. for /proc/pid inodes. @@ -1008,7 +1001,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * See whether a specific operational right is granted to a process on a * key. * @key_ref refers to the key (key pointer + possession attribute bit). - * @context points to the process to provide the context against which to + * @cred points to the credentials to provide the context against which to * evaluate the security data on the key. * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the @@ -1170,6 +1163,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @child process. * Security modules may also want to perform a process tracing check * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of * binprm_security_ops if the process is being traced and its security * attributes would be changed by the execve. * @child contains the task_struct structure for the target process. @@ -1193,19 +1187,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if the capability sets were successfully obtained. - * @capset_check: - * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the current process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if permission is granted. - * @capset_set: + * @capset: * Set the @effective, @inheritable, and @permitted capability sets for * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. * @capable: * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. @@ -1297,12 +1287,11 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - void (*capset_set) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); + int (*capset) (struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1314,7 +1303,7 @@ struct security_operations { int (*bprm_alloc_security) (struct linux_binprm *bprm); void (*bprm_free_security) (struct linux_binprm *bprm); - void (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); + int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); void (*bprm_post_apply_creds) (struct linux_binprm *bprm); int (*bprm_set_security) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); @@ -1405,11 +1394,13 @@ struct security_operations { int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); - int (*cred_alloc_security) (struct cred *cred); void (*cred_free) (struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_commit)(struct cred *new, const struct cred *old); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); - int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , - uid_t old_euid, uid_t old_suid, int flags); + int (*task_fix_setuid) (struct cred *new, const struct cred *old, + int flags); int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); int (*task_getpgid) (struct task_struct *p); @@ -1429,8 +1420,7 @@ struct security_operations { int (*task_wait) (struct task_struct *p); int (*task_prctl) (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p); - void (*task_reparent_to_init) (struct task_struct *p); + unsigned long arg5); void (*task_to_inode) (struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); @@ -1535,10 +1525,10 @@ struct security_operations { /* key management security hooks */ #ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, struct task_struct *tsk, unsigned long flags); + int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); void (*key_free) (struct key *key); int (*key_permission) (key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ @@ -1564,12 +1554,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1583,7 +1571,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); void security_bprm_post_apply_creds(struct linux_binprm *bprm); int security_bprm_set(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -1660,11 +1648,12 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); -int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); +void security_commit_creds(struct cred *new, const struct cred *old); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags); +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags); int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); @@ -1683,8 +1672,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); -void security_task_reparent_to_init(struct task_struct *p); + unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); @@ -1759,18 +1747,13 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static inline int security_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return cap_capset_check(effective, inheritable, permitted); -} - -static inline void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - cap_capset_set(effective, inheritable, permitted); + return cap_capset(new, old, effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) @@ -1837,9 +1820,9 @@ static inline int security_bprm_alloc(struct linux_binprm *bprm) static inline void security_bprm_free(struct linux_binprm *bprm) { } -static inline void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_apply_creds(bprm, unsafe); } static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -2182,13 +2165,20 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_cred_alloc(struct cred *cred) +static inline void security_cred_free(struct cred *cred) +{ } + +static inline int security_prepare_creds(struct cred *new, + const struct cred *old, + gfp_t gfp) { return 0; } -static inline void security_cred_free(struct cred *cred) -{ } +static inline void security_commit_creds(struct cred *new, + const struct cred *old) +{ +} static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -2196,10 +2186,11 @@ static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, return 0; } -static inline int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +static inline int security_task_fix_setuid(struct cred *new, + const struct cred *old, + int flags) { - return cap_task_post_setuid(old_ruid, old_euid, old_suid, flags); + return cap_task_fix_setuid(new, old, flags); } static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, @@ -2286,14 +2277,9 @@ static inline int security_task_wait(struct task_struct *p) static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p) -{ - return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p); -} - -static inline void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg5) { - cap_task_reparent_to_init(p); + return cap_task_prctl(option, arg2, arg3, arg3, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -2719,16 +2705,16 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags); +int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm); + const struct cred *cred, key_perm_t perm); int security_key_getsecurity(struct key *key, char **_buffer); #else static inline int security_key_alloc(struct key *key, - struct task_struct *tsk, + const struct cred *cred, unsigned long flags) { return 0; @@ -2739,7 +2725,7 @@ static inline void security_key_free(struct key *key) } static inline int security_key_permission(key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm) { return 0; diff --git a/init/main.c b/init/main.c index 7e117a231af1..db843bff5732 100644 --- a/init/main.c +++ b/init/main.c @@ -669,6 +669,7 @@ asmlinkage void __init start_kernel(void) efi_enter_virtual_mode(); #endif thread_info_cache_init(); + cred_init(); fork_init(num_physpages); proc_caches_init(); buffer_init(); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ae8ef88ade3f..bc1e2d854bf6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2546,18 +2546,17 @@ int __audit_signal_info(int sig, struct task_struct *t) /** * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps - * @bprm pointer to the bprm being processed - * @caps the caps read from the disk + * @bprm: pointer to the bprm being processed + * @new: the proposed new credentials + * @old: the old credentials * * Simply check if the proc already has the caps given by the file and if not * store the priv escalation info for later auditing at the end of the syscall * - * this can fail and we don't care. See the note in audit.h for - * audit_log_bprm_fcaps() for my explaination.... - * * -Eric */ -void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, const struct cred *old) { struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = current->audit_context; @@ -2566,7 +2565,7 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) - return; + return -ENOMEM; ax->d.type = AUDIT_BPRM_FCAPS; ax->d.next = context->aux; @@ -2581,26 +2580,27 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; - ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cred->cap_inheritable; - ax->old_pcap.effective = *pE; + ax->old_pcap.permitted = old->cap_permitted; + ax->old_pcap.inheritable = old->cap_inheritable; + ax->old_pcap.effective = old->cap_effective; - ax->new_pcap.permitted = current->cred->cap_permitted; - ax->new_pcap.inheritable = current->cred->cap_inheritable; - ax->new_pcap.effective = current->cred->cap_effective; + ax->new_pcap.permitted = new->cap_permitted; + ax->new_pcap.inheritable = new->cap_inheritable; + ax->new_pcap.effective = new->cap_effective; + return 0; } /** * __audit_log_capset - store information about the arguments to the capset syscall - * @pid target pid of the capset call - * @eff effective cap set - * @inh inheritible cap set - * @perm permited cap set + * @pid: target pid of the capset call + * @new: the new credentials + * @old: the old (current) credentials * * Record the aguments userspace sent to sys_capset for later printing by the * audit system if applicable */ -int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +int __audit_log_capset(pid_t pid, + const struct cred *new, const struct cred *old) { struct audit_aux_data_capset *ax; struct audit_context *context = current->audit_context; @@ -2617,9 +2617,9 @@ int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_c context->aux = (void *)ax; ax->pid = pid; - ax->cap.effective = *eff; - ax->cap.inheritable = *eff; - ax->cap.permitted = *perm; + ax->cap.effective = new->cap_effective; + ax->cap.inheritable = new->cap_effective; + ax->cap.permitted = new->cap_permitted; return 0; } diff --git a/kernel/capability.c b/kernel/capability.c index a404b980b1bd..36b4b4daebec 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -15,12 +15,7 @@ #include #include #include - -/* - * This lock protects task->cap_* for all tasks including current. - * Locking rule: acquire this prior to tasklist_lock. - */ -static DEFINE_SPINLOCK(task_capability_lock); +#include "cred-internals.h" /* * Leveraged for setting/resetting capabilities @@ -128,12 +123,11 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) } /* - * If we have configured with filesystem capability support, then the - * only thing that can change the capabilities of the current process - * is the current process. As such, we can't be in this code at the - * same time as we are in the process of setting capabilities in this - * process. The net result is that we can limit our use of locks to - * when we are reading the caps of another process. + * The only thing that can change the capabilities of the current + * process is the current process. As such, we can't be in this code + * at the same time as we are in the process of setting capabilities + * in this process. The net result is that we can limit our use of + * locks to when we are reading the caps of another process. */ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, kernel_cap_t *pIp, kernel_cap_t *pPp) @@ -143,7 +137,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, if (pid && (pid != task_pid_vnr(current))) { struct task_struct *target; - spin_lock(&task_capability_lock); read_lock(&tasklist_lock); target = find_task_by_vpid(pid); @@ -153,34 +146,12 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, ret = security_capget(target, pEp, pIp, pPp); read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); } else ret = security_capget(current, pEp, pIp, pPp); return ret; } -/* - * Atomically modify the effective capabilities returning the original - * value. No permission check is performed here - it is assumed that the - * caller is permitted to set the desired effective capabilities. - */ -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) -{ - kernel_cap_t pE_old; - - spin_lock(&task_capability_lock); - - pE_old = current->cred->cap_effective; - current->cred->cap_effective = pE_new; - - spin_unlock(&task_capability_lock); - - return pE_old; -} - -EXPORT_SYMBOL(cap_set_effective); - /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and @@ -208,7 +179,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) return -EINVAL; ret = cap_get_target_pid(pid, &pE, &pI, &pP); - if (!ret) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i; @@ -270,6 +240,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; kernel_cap_t inheritable, permitted, effective; + struct cred *new; int ret; pid_t pid; @@ -284,8 +255,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (pid != 0 && pid != task_pid_vnr(current)) return -EPERM; - if (copy_from_user(&kdata, data, tocopy - * sizeof(struct __user_cap_data_struct))) + if (copy_from_user(&kdata, data, + tocopy * sizeof(struct __user_cap_data_struct))) return -EFAULT; for (i = 0; i < tocopy; i++) { @@ -300,24 +271,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) i++; } - ret = audit_log_capset(pid, &effective, &inheritable, &permitted); - if (ret) + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = security_capset(new, current_cred(), + &effective, &inheritable, &permitted); + if (ret < 0) + goto error; + + ret = audit_log_capset(pid, new, current_cred()); + if (ret < 0) return ret; - /* This lock is required even when filesystem capability support is - * configured - it protects the sys_capget() call from returning - * incorrect data in the case that the targeted process is not the - * current one. - */ - spin_lock(&task_capability_lock); - - ret = security_capset_check(&effective, &inheritable, &permitted); - /* Having verified that the proposed changes are legal, we now put them - * into effect. - */ - if (!ret) - security_capset_set(&effective, &inheritable, &permitted); - spin_unlock(&task_capability_lock); + return commit_creds(new); + +error: + abort_creds(new); return ret; } diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h new file mode 100644 index 000000000000..2dc4fc2d0bf1 --- /dev/null +++ b/kernel/cred-internals.h @@ -0,0 +1,21 @@ +/* Internal credentials stuff + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +/* + * user.c + */ +static inline void sched_switch_user(struct task_struct *p) +{ +#ifdef CONFIG_USER_SCHED + sched_move_task(p); +#endif /* CONFIG_USER_SCHED */ +} + diff --git a/kernel/cred.c b/kernel/cred.c index ac73e3617684..cb6b5eda978d 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -15,6 +15,10 @@ #include #include #include +#include +#include "cred-internals.h" + +static struct kmem_cache *cred_jar; /* * The common credentials for the initial task's thread group @@ -64,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu) /* * Release a set of thread group credentials. */ -static void release_tgcred(struct cred *cred) +void release_tgcred(struct cred *cred) { #ifdef CONFIG_KEYS struct thread_group_cred *tgcred = cred->tgcred; @@ -81,79 +85,322 @@ static void put_cred_rcu(struct rcu_head *rcu) { struct cred *cred = container_of(rcu, struct cred, rcu); - BUG_ON(atomic_read(&cred->usage) != 0); + if (atomic_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %d\n", + cred, atomic_read(&cred->usage)); + security_cred_free(cred); key_put(cred->thread_keyring); key_put(cred->request_key_auth); release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); - security_cred_free(cred); - kfree(cred); + kmem_cache_free(cred_jar, cred); } /** * __put_cred - Destroy a set of credentials - * @sec: The record to release + * @cred: The record to release * * Destroy a set of credentials on which no references remain. */ void __put_cred(struct cred *cred) { + BUG_ON(atomic_read(&cred->usage) != 0); + call_rcu(&cred->rcu, put_cred_rcu); } EXPORT_SYMBOL(__put_cred); +/** + * prepare_creds - Prepare a new set of credentials for modification + * + * Prepare a new set of task credentials for modification. A task's creds + * shouldn't generally be modified directly, therefore this function is used to + * prepare a new copy, which the caller then modifies and then commits by + * calling commit_creds(). + * + * Returns a pointer to the new creds-to-be if successful, NULL otherwise. + * + * Call commit_creds() or abort_creds() to clean up. + */ +struct cred *prepare_creds(void) +{ + struct task_struct *task = current; + const struct cred *old; + struct cred *new; + + BUG_ON(atomic_read(&task->cred->usage) < 1); + + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + if (!new) + return NULL; + + old = task->cred; + memcpy(new, old, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + key_get(new->thread_keyring); + key_get(new->request_key_auth); + atomic_inc(&new->tgcred->usage); +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + + if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + goto error; + return new; + +error: + abort_creds(new); + return NULL; +} +EXPORT_SYMBOL(prepare_creds); + +/* + * prepare new credentials for the usermode helper dispatcher + */ +struct cred *prepare_usermodehelper_creds(void) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = NULL; +#endif + struct cred *new; + +#ifdef CONFIG_KEYS + tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC); + if (!tgcred) + return NULL; +#endif + + new = kmem_cache_alloc(cred_jar, GFP_ATOMIC); + if (!new) + return NULL; + + memcpy(new, &init_cred, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + new->thread_keyring = NULL; + new->request_key_auth = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT; + + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + new->tgcred = tgcred; +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0) + goto error; + + BUG_ON(atomic_read(&new->usage) != 1); + return new; + +error: + put_cred(new); + return NULL; +} + /* * Copy credentials for the new process created by fork() + * + * We share if we can, but under some circumstances we have to generate a new + * set. */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { - struct cred *pcred; - int ret; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred; +#endif + struct cred *new; + + mutex_init(&p->cred_exec_mutex); - pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); - if (!pcred) + if ( +#ifdef CONFIG_KEYS + !p->cred->thread_keyring && +#endif + clone_flags & CLONE_THREAD + ) { + get_cred(p->cred); + atomic_inc(&p->cred->user->processes); + return 0; + } + + new = prepare_creds(); + if (!new) return -ENOMEM; #ifdef CONFIG_KEYS - if (clone_flags & CLONE_THREAD) { - atomic_inc(&pcred->tgcred->usage); - } else { - pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); - if (!pcred->tgcred) { - kfree(pcred); + /* new threads get their own thread keyrings if their parent already + * had one */ + if (new->thread_keyring) { + key_put(new->thread_keyring); + new->thread_keyring = NULL; + if (clone_flags & CLONE_THREAD) + install_thread_keyring_to_cred(new); + } + + /* we share the process and session keyrings between all the threads in + * a process - this is slightly icky as we violate COW credentials a + * bit */ + if (!(clone_flags & CLONE_THREAD)) { + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) { + put_cred(new); return -ENOMEM; } - atomic_set(&pcred->tgcred->usage, 1); - spin_lock_init(&pcred->tgcred->lock); - pcred->tgcred->process_keyring = NULL; - pcred->tgcred->session_keyring = - key_get(p->cred->tgcred->session_keyring); + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + tgcred->process_keyring = NULL; + tgcred->session_keyring = key_get(new->tgcred->session_keyring); + + release_tgcred(new); + new->tgcred = tgcred; } #endif -#ifdef CONFIG_SECURITY - pcred->security = NULL; -#endif + atomic_inc(&new->user->processes); + p->cred = new; + return 0; +} - ret = security_cred_alloc(pcred); - if (ret < 0) { - release_tgcred(pcred); - kfree(pcred); - return ret; +/** + * commit_creds - Install new credentials upon the current task + * @new: The credentials to be assigned + * + * Install a new set of credentials to the current task, using RCU to replace + * the old set. + * + * This function eats the caller's reference to the new credentials. + * + * Always returns 0 thus allowing this function to be tail-called at the end + * of, say, sys_setgid(). + */ +int commit_creds(struct cred *new) +{ + struct task_struct *task = current; + const struct cred *old; + + BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_read(&task->cred->usage) < 1); + + old = task->cred; + security_commit_creds(new, old); + + /* dumpability changes */ + if (old->euid != new->euid || + old->egid != new->egid || + old->fsuid != new->fsuid || + old->fsgid != new->fsgid || + !cap_issubset(new->cap_permitted, old->cap_permitted)) { + set_dumpable(task->mm, suid_dumpable); + task->pdeath_signal = 0; + smp_wmb(); } - atomic_set(&pcred->usage, 1); - get_group_info(pcred->group_info); - get_uid(pcred->user); - key_get(pcred->thread_keyring); - key_get(pcred->request_key_auth); + /* alter the thread keyring */ + if (new->fsuid != old->fsuid) + key_fsuid_changed(task); + if (new->fsgid != old->fsgid) + key_fsgid_changed(task); + + /* do it + * - What if a process setreuid()'s and this brings the + * new uid over his NPROC rlimit? We can check this now + * cheaply with the new uid cache, so if it matters + * we should be checking for it. -DaveM + */ + if (new->user != old->user) + atomic_inc(&new->user->processes); + rcu_assign_pointer(task->cred, new); + if (new->user != old->user) + atomic_dec(&old->user->processes); + + sched_switch_user(task); + + /* send notifications */ + if (new->uid != old->uid || + new->euid != old->euid || + new->suid != old->suid || + new->fsuid != old->fsuid) + proc_id_connector(task, PROC_EVENT_UID); - atomic_inc(&pcred->user->processes); + if (new->gid != old->gid || + new->egid != old->egid || + new->sgid != old->sgid || + new->fsgid != old->fsgid) + proc_id_connector(task, PROC_EVENT_GID); - /* RCU assignment is unneeded here as no-one can have accessed this - * pointer yet, barring us */ - p->cred = pcred; + put_cred(old); return 0; } +EXPORT_SYMBOL(commit_creds); + +/** + * abort_creds - Discard a set of credentials and unlock the current task + * @new: The credentials that were going to be applied + * + * Discard a set of credentials that were under construction and unlock the + * current task. + */ +void abort_creds(struct cred *new) +{ + BUG_ON(atomic_read(&new->usage) < 1); + put_cred(new); +} +EXPORT_SYMBOL(abort_creds); + +/** + * override_creds - Temporarily override the current process's credentials + * @new: The credentials to be assigned + * + * Install a set of temporary override credentials on the current process, + * returning the old set for later reversion. + */ +const struct cred *override_creds(const struct cred *new) +{ + const struct cred *old = current->cred; + + rcu_assign_pointer(current->cred, get_cred(new)); + return old; +} +EXPORT_SYMBOL(override_creds); + +/** + * revert_creds - Revert a temporary credentials override + * @old: The credentials to be restored + * + * Revert a temporary set of override credentials to an old set, discarding the + * override set. + */ +void revert_creds(const struct cred *old) +{ + const struct cred *override = current->cred; + + rcu_assign_pointer(current->cred, old); + put_cred(override); +} +EXPORT_SYMBOL(revert_creds); + +/* + * initialise the credentials stuff + */ +void __init cred_init(void) +{ + /* allocate a slab in which we can store credentials */ + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); +} diff --git a/kernel/exit.c b/kernel/exit.c index bbc22530f2c1..c0711da15486 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,12 +47,14 @@ #include #include #include +#include #include #include #include #include #include +#include "cred-internals.h" static void exit_mm(struct task_struct * tsk); @@ -338,12 +340,12 @@ static void reparent_to_kthreadd(void) /* cpus_allowed? */ /* rt_priority? */ /* signals? */ - security_task_reparent_to_init(current); memcpy(current->signal->rlim, init_task.signal->rlim, sizeof(current->signal->rlim)); - atomic_inc(&(INIT_USER->__count)); + + atomic_inc(&init_cred.usage); + commit_creds(&init_cred); write_unlock_irq(&tasklist_lock); - switch_uid(INIT_USER); } void __set_special_pids(struct pid *pid) @@ -1085,7 +1087,6 @@ NORET_TYPE void do_exit(long code) check_stack_usage(); exit_thread(); cgroup_exit(tsk, 1); - exit_keys(tsk); if (group_dead && tsk->signal->leader) disassociate_ctty(1); diff --git a/kernel/fork.c b/kernel/fork.c index ded1972672a3..82a7948a664e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1084,10 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; - if ((retval = copy_keys(clone_flags, p))) - goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) - goto bad_fork_cleanup_keys; + goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); @@ -1252,8 +1250,6 @@ bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); -bad_fork_cleanup_keys: - exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); @@ -1281,6 +1277,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/kernel/kmod.c b/kernel/kmod.c index f044f8f57703..b46dbb908669 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -118,10 +118,10 @@ EXPORT_SYMBOL(request_module); struct subprocess_info { struct work_struct work; struct completion *complete; + struct cred *cred; char *path; char **argv; char **envp; - struct key *ring; enum umh_wait wait; int retval; struct file *stdin; @@ -134,19 +134,20 @@ struct subprocess_info { static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; - struct key *new_session, *old_session; int retval; - /* Unblock all signals and set the session keyring. */ - new_session = key_get(sub_info->ring); + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + + /* Unblock all signals */ spin_lock_irq(¤t->sighand->siglock); - old_session = __install_session_keyring(new_session); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - key_put(old_session); + /* Install the credentials */ + commit_creds(sub_info->cred); + sub_info->cred = NULL; /* Install input pipe when needed */ if (sub_info->stdin) { @@ -185,6 +186,8 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) (*info->cleanup)(info->argv, info->envp); + if (info->cred) + put_cred(info->cred); kfree(info); } EXPORT_SYMBOL(call_usermodehelper_freeinfo); @@ -240,6 +243,8 @@ static void __call_usermodehelper(struct work_struct *work) pid_t pid; enum umh_wait wait = sub_info->wait; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -362,6 +367,9 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + sub_info->cred = prepare_usermodehelper_creds(); + if (!sub_info->cred) + return NULL; out: return sub_info; @@ -376,7 +384,13 @@ EXPORT_SYMBOL(call_usermodehelper_setup); void call_usermodehelper_setkeys(struct subprocess_info *info, struct key *session_keyring) { - info->ring = session_keyring; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = info->cred->tgcred; + key_put(tgcred->session_keyring); + tgcred->session_keyring = key_get(session_keyring); +#else + BUG(); +#endif } EXPORT_SYMBOL(call_usermodehelper_setkeys); @@ -444,6 +458,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, DECLARE_COMPLETION_ONSTACK(done); int retval = 0; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + helper_lock(); if (sub_info->path[0] == '\0') goto out; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b9d5f4e4f6a4..f764b8806955 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -171,6 +171,14 @@ int ptrace_attach(struct task_struct *task) if (same_thread_group(task, current)) goto out; + /* Protect exec's credential calculations against our interference; + * SUID, SGID and LSM creds get determined differently under ptrace. + */ + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out; + + retval = -EPERM; repeat: /* * Nasty, nasty. @@ -210,6 +218,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); + mutex_unlock(¤t->cred_exec_mutex); out: return retval; } diff --git a/kernel/signal.c b/kernel/signal.c index 84989124bafb..2a64304ed54b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -180,7 +180,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask) /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an - * appopriate lock must be held to protect t's user_struct + * appopriate lock must be held to stop the target task from exiting */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) @@ -194,7 +194,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * caller must be holding the RCU readlock (by way of a spinlock) and * we use RCU protection here */ - user = __task_cred(t)->user; + user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -202,12 +202,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, q = kmem_cache_alloc(sigqueue_cachep, flags); if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = get_uid(user); + q->user = user; } - return(q); + + return q; } static void __sigqueue_free(struct sigqueue *q) diff --git a/kernel/sys.c b/kernel/sys.c index ccc9eb736d35..ab735040468a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -180,7 +180,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = cred->user; + user = (struct user_struct *) cred->user; if (!who) who = cred->uid; else if ((who != cred->uid) && @@ -479,47 +479,48 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - struct cred *cred = current->cred; - int old_rgid = cred->gid; - int old_egid = cred->egid; - int new_rgid = old_rgid; - int new_egid = old_egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); if (retval) - return retval; + goto error; + retval = -EPERM; if (rgid != (gid_t) -1) { - if ((old_rgid == rgid) || - (cred->egid == rgid) || + if (old->gid == rgid || + old->egid == rgid || capable(CAP_SETGID)) - new_rgid = rgid; + new->gid = rgid; else - return -EPERM; + goto error; } if (egid != (gid_t) -1) { - if ((old_rgid == egid) || - (cred->egid == egid) || - (cred->sgid == egid) || + if (old->gid == egid || + old->egid == egid || + old->sgid == egid || capable(CAP_SETGID)) - new_egid = egid; + new->egid = egid; else - return -EPERM; - } - if (new_egid != old_egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + goto error; } + if (rgid != (gid_t) -1 || - (egid != (gid_t) -1 && egid != old_rgid)) - cred->sgid = new_egid; - cred->fsgid = new_egid; - cred->egid = new_egid; - cred->gid = new_rgid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + (egid != (gid_t) -1 && egid != old->gid)) + new->sgid = new->egid; + new->fsgid = new->egid; + + return commit_creds(new); + +error: + abort_creds(new); + return retval; } /* @@ -529,40 +530,42 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - struct cred *cred = current->cred; - int old_egid = cred->egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - if (capable(CAP_SETGID)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; - } else if ((gid == cred->gid) || (gid == cred->sgid)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = cred->fsgid = gid; - } + retval = -EPERM; + if (capable(CAP_SETGID)) + new->gid = new->egid = new->sgid = new->fsgid = gid; + else if (gid == old->gid || gid == old->sgid) + new->egid = new->fsgid = gid; else - return -EPERM; + goto error; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } -static int set_user(uid_t new_ruid, int dumpclear) +/* + * change the user struct in a credentials set to match the new UID + */ +static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); + new_user = alloc_uid(current->nsproxy->user_ns, new->uid); if (!new_user) return -EAGAIN; @@ -573,13 +576,8 @@ static int set_user(uid_t new_ruid, int dumpclear) return -EAGAIN; } - switch_uid(new_user); - - if (dumpclear) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - current->cred->uid = new_ruid; + free_uid(new->user); + new->user = new_user; return 0; } @@ -600,55 +598,56 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { - struct cred *cred = current->cred; - int old_ruid, old_euid, old_suid, new_ruid, new_euid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); if (retval) - return retval; - - new_ruid = old_ruid = cred->uid; - new_euid = old_euid = cred->euid; - old_suid = cred->suid; + goto error; + retval = -EPERM; if (ruid != (uid_t) -1) { - new_ruid = ruid; - if ((old_ruid != ruid) && - (cred->euid != ruid) && + new->uid = ruid; + if (old->uid != ruid && + old->euid != ruid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } if (euid != (uid_t) -1) { - new_euid = euid; - if ((old_ruid != euid) && - (cred->euid != euid) && - (cred->suid != euid) && + new->euid = euid; + if (old->uid != euid && + old->euid != euid && + old->suid != euid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } - if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) - return -EAGAIN; + retval = -EAGAIN; + if (new->uid != old->uid && set_user(new) < 0) + goto error; - if (new_euid != old_euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || - (euid != (uid_t) -1 && euid != old_ruid)) - cred->suid = cred->euid; - cred->fsuid = cred->euid; - - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + (euid != (uid_t) -1 && euid != old->uid)) + new->suid = new->euid; + new->fsuid = new->euid; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); -} + retval = security_task_fix_setuid(new, old, LSM_SETID_RE); + if (retval < 0) + goto error; + return commit_creds(new); +error: + abort_creds(new); + return retval; +} /* * setuid() is implemented like SysV with SAVED_IDS @@ -663,37 +662,41 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - struct cred *cred = current->cred; - int old_euid = cred->euid; - int old_ruid, old_suid, new_suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - old_ruid = cred->uid; - old_suid = cred->suid; - new_suid = old_suid; - + retval = -EPERM; if (capable(CAP_SETUID)) { - if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) - return -EAGAIN; - new_suid = uid; - } else if ((uid != cred->uid) && (uid != new_suid)) - return -EPERM; - - if (old_euid != uid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->suid = new->uid = uid; + if (uid != old->uid && set_user(new) < 0) { + retval = -EAGAIN; + goto error; + } + } else if (uid != old->uid && uid != new->suid) { + goto error; } - cred->fsuid = cred->euid = uid; - cred->suid = new_suid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + new->fsuid = new->euid = uid; + + retval = security_task_fix_setuid(new, old, LSM_SETID_ID); + if (retval < 0) + goto error; + + return commit_creds(new); - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); +error: + abort_creds(new); + return retval; } @@ -703,47 +706,53 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - struct cred *cred = current->cred; - int old_ruid = cred->uid; - int old_euid = cred->euid; - int old_suid = cred->suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); if (retval) - return retval; + goto error; + old = current_cred(); + retval = -EPERM; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != cred->uid) && - (ruid != cred->euid) && (ruid != cred->suid)) - return -EPERM; - if ((euid != (uid_t) -1) && (euid != cred->uid) && - (euid != cred->euid) && (euid != cred->suid)) - return -EPERM; - if ((suid != (uid_t) -1) && (suid != cred->uid) && - (suid != cred->euid) && (suid != cred->suid)) - return -EPERM; + if (ruid != (uid_t) -1 && ruid != old->uid && + ruid != old->euid && ruid != old->suid) + goto error; + if (euid != (uid_t) -1 && euid != old->uid && + euid != old->euid && euid != old->suid) + goto error; + if (suid != (uid_t) -1 && suid != old->uid && + suid != old->euid && suid != old->suid) + goto error; } + + retval = -EAGAIN; if (ruid != (uid_t) -1) { - if (ruid != cred->uid && - set_user(ruid, euid != cred->euid) < 0) - return -EAGAIN; + new->uid = ruid; + if (ruid != old->uid && set_user(new) < 0) + goto error; } - if (euid != (uid_t) -1) { - if (euid != cred->euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->euid = euid; - } - cred->fsuid = cred->euid; + if (euid != (uid_t) -1) + new->euid = euid; if (suid != (uid_t) -1) - cred->suid = suid; + new->suid = suid; + new->fsuid = new->euid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + retval = security_task_fix_setuid(new, old, LSM_SETID_RES); + if (retval < 0) + goto error; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) @@ -763,40 +772,45 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); if (retval) - return retval; + goto error; + retval = -EPERM; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != cred->gid) && - (rgid != cred->egid) && (rgid != cred->sgid)) - return -EPERM; - if ((egid != (gid_t) -1) && (egid != cred->gid) && - (egid != cred->egid) && (egid != cred->sgid)) - return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != cred->gid) && - (sgid != cred->egid) && (sgid != cred->sgid)) - return -EPERM; + if (rgid != (gid_t) -1 && rgid != old->gid && + rgid != old->egid && rgid != old->sgid) + goto error; + if (egid != (gid_t) -1 && egid != old->gid && + egid != old->egid && egid != old->sgid) + goto error; + if (sgid != (gid_t) -1 && sgid != old->gid && + sgid != old->egid && sgid != old->sgid) + goto error; } - if (egid != (gid_t) -1) { - if (egid != cred->egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = egid; - } - cred->fsgid = cred->egid; + if (rgid != (gid_t) -1) - cred->gid = rgid; + new->gid = rgid; + if (egid != (gid_t) -1) + new->egid = egid; if (sgid != (gid_t) -1) - cred->sgid = sgid; + new->sgid = sgid; + new->fsgid = new->egid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) @@ -820,28 +834,35 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { - struct cred *cred = current->cred; - int old_fsuid; + const struct cred *old; + struct cred *new; + uid_t old_fsuid; + + new = prepare_creds(); + if (!new) + return current_fsuid(); + old = current_cred(); + old_fsuid = old->fsuid; - old_fsuid = cred->fsuid; - if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) - return old_fsuid; + if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0) + goto error; - if (uid == cred->uid || uid == cred->euid || - uid == cred->suid || uid == cred->fsuid || + if (uid == old->uid || uid == old->euid || + uid == old->suid || uid == old->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsuid = uid; + if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) + goto change_okay; } - cred->fsuid = uid; } - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); - - security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); +error: + abort_creds(new); + return old_fsuid; +change_okay: + commit_creds(new); return old_fsuid; } @@ -850,24 +871,34 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { - struct cred *cred = current->cred; - int old_fsgid; + const struct cred *old; + struct cred *new; + gid_t old_fsgid; + + new = prepare_creds(); + if (!new) + return current_fsgid(); + old = current_cred(); + old_fsgid = old->fsgid; - old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) - return old_fsgid; + goto error; - if (gid == cred->gid || gid == cred->egid || - gid == cred->sgid || gid == cred->fsgid || + if (gid == old->gid || gid == old->egid || + gid == old->sgid || gid == old->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsgid = gid; + goto change_okay; } - cred->fsgid = gid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); } + +error: + abort_creds(new); + return old_fsgid; + +change_okay: + commit_creds(new); return old_fsgid; } @@ -1136,7 +1167,7 @@ EXPORT_SYMBOL(groups_free); /* export the group_info to a user-space array */ static int groups_to_user(gid_t __user *grouplist, - struct group_info *group_info) + const struct group_info *group_info) { int i; unsigned int count = group_info->ngroups; @@ -1227,31 +1258,25 @@ int groups_search(const struct group_info *group_info, gid_t grp) } /** - * set_groups - Change a group subscription in a security record - * @sec: The security record to alter - * @group_info: The group list to impose + * set_groups - Change a group subscription in a set of credentials + * @new: The newly prepared set of credentials to alter + * @group_info: The group list to install * - * Validate a group subscription and, if valid, impose it upon a task security - * record. + * Validate a group subscription and, if valid, insert it into a set + * of credentials. */ -int set_groups(struct cred *cred, struct group_info *group_info) +int set_groups(struct cred *new, struct group_info *group_info) { int retval; - struct group_info *old_info; retval = security_task_setgroups(group_info); if (retval) return retval; + put_group_info(new->group_info); groups_sort(group_info); get_group_info(group_info); - - spin_lock(&cred->lock); - old_info = cred->group_info; - cred->group_info = group_info; - spin_unlock(&cred->lock); - - put_group_info(old_info); + new->group_info = group_info; return 0; } @@ -1266,7 +1291,20 @@ EXPORT_SYMBOL(set_groups); */ int set_current_groups(struct group_info *group_info) { - return set_groups(current->cred, group_info); + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = set_groups(new, group_info); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); } EXPORT_SYMBOL(set_current_groups); @@ -1666,9 +1704,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned char comm[sizeof(me->comm)]; long error; - if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) + error = security_task_prctl(option, arg2, arg3, arg4, arg5); + if (error != -ENOSYS) return error; + error = 0; switch (option) { case PR_SET_PDEATHSIG: if (!valid_signal(arg2)) { diff --git a/kernel/user.c b/kernel/user.c index 104d22ac84d5..d476307dd4b0 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include #include #include +#include "cred-internals.h" struct user_namespace init_user_ns = { .kref = { @@ -104,16 +105,10 @@ static int sched_create_user(struct user_struct *up) return rc; } -static void sched_switch_user(struct task_struct *p) -{ - sched_move_task(p); -} - #else /* CONFIG_USER_SCHED */ static void sched_destroy_user(struct user_struct *up) { } static int sched_create_user(struct user_struct *up) { return 0; } -static void sched_switch_user(struct task_struct *p) { } #endif /* CONFIG_USER_SCHED */ @@ -448,36 +443,6 @@ out_unlock: return NULL; } -void switch_uid(struct user_struct *new_user) -{ - struct user_struct *old_user; - - /* What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM - */ - old_user = current->cred->user; - atomic_inc(&new_user->processes); - atomic_dec(&old_user->processes); - switch_uid_keyring(new_user); - current->cred->user = new_user; - sched_switch_user(current); - - /* - * We need to synchronize with __sigqueue_alloc() - * doing a get_uid(p->user).. If that saw the old - * user value, we need to wait until it has exited - * its critical region before we can free the old - * structure. - */ - smp_mb(); - spin_unlock_wait(¤t->sighand->siglock); - - free_uid(old_user); - suid_keys(current); -} - #ifdef CONFIG_USER_NS void release_uids(struct user_namespace *ns) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f82730adea00..0d9c51d67333 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -19,6 +19,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) { struct user_namespace *ns; struct user_struct *new_user; + struct cred *new; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); @@ -45,7 +46,16 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) return ERR_PTR(-ENOMEM); } - switch_uid(new_user); + /* Install the new user */ + new = prepare_creds(); + if (!new) { + free_uid(new_user); + free_uid(ns->root_user); + kfree(ns); + } + free_uid(new->user); + new->user = new_user; + commit_creds(new); return ns; } diff --git a/lib/Makefile b/lib/Makefile index 7cb65d85aeb0..80fe8a3ec12a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 9a8ff684da79..ad8c7a782da1 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -287,6 +287,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, time_t expiry, u32 kvno) { + const struct cred *cred = current_cred(); struct key *key; int ret; @@ -297,7 +298,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0, + key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -340,10 +341,11 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key); */ struct key *rxrpc_get_null_key(const char *keyname) { + const struct cred *cred = current_cred(); struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; diff --git a/security/capability.c b/security/capability.c index fac2f61b69a9..efeb6d9e0e6a 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,16 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_cred_alloc_security(struct cred *cred) +static void cap_cred_free(struct cred *cred) +{ +} + +static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { return 0; } -static void cap_cred_free(struct cred *cred) +static void cap_cred_commit(struct cred *new, const struct cred *old) { } @@ -750,7 +754,7 @@ static void cap_release_secctx(char *secdata, u32 seclen) } #ifdef CONFIG_KEYS -static int cap_key_alloc(struct key *key, struct task_struct *ctx, +static int cap_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { return 0; @@ -760,7 +764,7 @@ static void cap_key_free(struct key *key) { } -static int cap_key_permission(key_ref_t key_ref, struct task_struct *context, +static int cap_key_permission(key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { return 0; @@ -814,8 +818,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, ptrace_may_access); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); - set_to_cap_if_null(ops, capset_check); - set_to_cap_if_null(ops, capset_set); + set_to_cap_if_null(ops, capset); set_to_cap_if_null(ops, acct); set_to_cap_if_null(ops, capable); set_to_cap_if_null(ops, quotactl); @@ -890,10 +893,11 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, cred_alloc_security); set_to_cap_if_null(ops, cred_free); + set_to_cap_if_null(ops, cred_prepare); + set_to_cap_if_null(ops, cred_commit); set_to_cap_if_null(ops, task_setuid); - set_to_cap_if_null(ops, task_post_setuid); + set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); set_to_cap_if_null(ops, task_setpgid); set_to_cap_if_null(ops, task_getpgid); @@ -910,7 +914,6 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, task_wait); set_to_cap_if_null(ops, task_kill); set_to_cap_if_null(ops, task_prctl); - set_to_cap_if_null(ops, task_reparent_to_init); set_to_cap_if_null(ops, task_to_inode); set_to_cap_if_null(ops, ipc_permission); set_to_cap_if_null(ops, ipc_getsecid); diff --git a/security/commoncap.c b/security/commoncap.c index 0384bf95db68..b5419273f92d 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -72,8 +72,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int ret = 0; rcu_read_lock(); - if (!cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted) && + if (!cap_issubset(__task_cred(child)->cap_permitted, + current_cred()->cap_permitted) && !capable(CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -85,8 +85,8 @@ int cap_ptrace_traceme(struct task_struct *parent) int ret = 0; rcu_read_lock(); - if (!cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted) && + if (!cap_issubset(current_cred()->cap_permitted, + __task_cred(parent)->cap_permitted) && !has_capability(parent, CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -117,7 +117,7 @@ static inline int cap_inh_is_capped(void) * to the old permitted set. That is, if the current task * does *not* possess the CAP_SETPCAP capability. */ - return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0); + return cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0; } static inline int cap_limit_ptraced_target(void) { return 1; } @@ -132,52 +132,39 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int cap_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - const struct cred *cred = current->cred; - - if (cap_inh_is_capped() - && !cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_permitted))) { + if (cap_inh_is_capped() && + !cap_issubset(*inheritable, + cap_combine(old->cap_inheritable, + old->cap_permitted))) /* incapable of using this inheritable set */ return -EPERM; - } + if (!cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_bset))) { + cap_combine(old->cap_inheritable, + old->cap_bset))) /* no new pI capabilities outside bounding set */ return -EPERM; - } /* verify restrictions on target's new Permitted set */ - if (!cap_issubset (*permitted, - cap_combine (cred->cap_permitted, - cred->cap_permitted))) { + if (!cap_issubset(*permitted, old->cap_permitted)) return -EPERM; - } /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ - if (!cap_issubset (*effective, *permitted)) { + if (!cap_issubset(*effective, *permitted)) return -EPERM; - } + new->cap_effective = *effective; + new->cap_inheritable = *inheritable; + new->cap_permitted = *permitted; return 0; } -void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - struct cred *cred = current->cred; - - cred->cap_effective = *effective; - cred->cap_inheritable = *inheritable; - cred->cap_permitted = *permitted; -} - static inline void bprm_clear_caps(struct linux_binprm *bprm) { cap_clear(bprm->cap_post_exec_permitted); @@ -382,41 +369,46 @@ int cap_bprm_set_security (struct linux_binprm *bprm) return ret; } -void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) +int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - struct cred *cred = current->cred; + const struct cred *old = current_cred(); + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; - if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || + if (bprm->e_uid != old->uid || bprm->e_gid != old->gid || !cap_issubset(bprm->cap_post_exec_permitted, - cred->cap_permitted)) { + old->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = cred->uid; - bprm->e_gid = cred->gid; + bprm->e_uid = old->uid; + bprm->e_gid = old->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - cred->cap_permitted); + new->cap_permitted); } } } - cred->suid = cred->euid = cred->fsuid = bprm->e_uid; - cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; + new->suid = new->euid = new->fsuid = bprm->e_uid; + new->sgid = new->egid = new->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - cred->cap_permitted = bprm->cap_post_exec_permitted; + new->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - cred->cap_effective = bprm->cap_post_exec_permitted; + new->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(cred->cap_effective); + cap_clear(new->cap_effective); } /* @@ -431,15 +423,15 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(cred->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || - (bprm->e_uid != 0) || (cred->uid != 0) || + if (!cap_isclear(new->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || + bprm->e_uid != 0 || new->uid != 0 || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &cred->cap_permitted, - &cred->cap_effective); + audit_log_bprm_fcaps(bprm, new, old); } - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + return commit_creds(new); } int cap_bprm_secureexec (struct linux_binprm *bprm) @@ -514,65 +506,49 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this. */ -static inline void cap_emulate_setxuid (int old_ruid, int old_euid, - int old_suid) +static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) { - struct cred *cred = current->cred; - - if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && + if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && + (new->uid != 0 && new->euid != 0 && new->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear(cred->cap_permitted); - cap_clear(cred->cap_effective); - } - if (old_euid == 0 && cred->euid != 0) { - cap_clear(cred->cap_effective); - } - if (old_euid != 0 && cred->euid == 0) { - cred->cap_effective = cred->cap_permitted; + cap_clear(new->cap_permitted); + cap_clear(new->cap_effective); } + if (old->euid == 0 && new->euid != 0) + cap_clear(new->cap_effective); + if (old->euid != 0 && new->euid == 0) + new->cap_effective = new->cap_permitted; } -int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, - int flags) +int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { - struct cred *cred = current->cred; - switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: case LSM_SETID_RES: /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - cap_emulate_setxuid (old_ruid, old_euid, old_suid); - } + if (!issecure(SECURE_NO_SETUID_FIXUP)) + cap_emulate_setxuid(new, old); break; case LSM_SETID_FS: - { - uid_t old_fsuid = old_ruid; - - /* Copied from kernel/sys.c:setfsuid. */ + /* Copied from kernel/sys.c:setfsuid. */ - /* - * FIXME - is fsuser used for all CAP_FS_MASK capabilities? - * if not, we might be a bit too harsh here. - */ - - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && cred->fsuid != 0) { - cred->cap_effective = - cap_drop_fs_set( - cred->cap_effective); - } - if (old_fsuid != 0 && cred->fsuid == 0) { - cred->cap_effective = - cap_raise_fs_set( - cred->cap_effective, - cred->cap_permitted); - } + /* + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + if (old->fsuid == 0 && new->fsuid != 0) { + new->cap_effective = + cap_drop_fs_set(new->cap_effective); + } + if (old->fsuid != 0 && new->fsuid == 0) { + new->cap_effective = + cap_raise_fs_set(new->cap_effective, + new->cap_permitted); } - break; } + break; default: return -EINVAL; } @@ -628,13 +604,14 @@ int cap_task_setnice (struct task_struct *p, int nice) * this task could get inconsistent info. There can be no * racing writer bc a task can only change its own caps. */ -static long cap_prctl_drop(unsigned long cap) +static long cap_prctl_drop(struct cred *new, unsigned long cap) { if (!capable(CAP_SETPCAP)) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cred->cap_bset, cap); + + cap_lower(new->cap_bset, cap); return 0; } @@ -655,22 +632,29 @@ int cap_task_setnice (struct task_struct *p, int nice) #endif int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) + unsigned long arg4, unsigned long arg5) { - struct cred *cred = current_cred(); + struct cred *new; long error = 0; + new = prepare_creds(); + if (!new) + return -ENOMEM; + switch (option) { case PR_CAPBSET_READ: + error = -EINVAL; if (!cap_valid(arg2)) - error = -EINVAL; - else - error = !!cap_raised(cred->cap_bset, arg2); - break; + goto error; + error = !!cap_raised(new->cap_bset, arg2); + goto no_change; + #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: - error = cap_prctl_drop(arg2); - break; + error = cap_prctl_drop(new, arg2); + if (error < 0) + goto error; + goto changed; /* * The next four prctl's remain to assist with transitioning a @@ -692,12 +676,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) - & (cred->securebits ^ arg2)) /*[1]*/ - || ((cred->securebits & SECURE_ALL_LOCKS - & ~arg2)) /*[2]*/ - || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ - || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ + error = -EPERM; + if ((((new->securebits & SECURE_ALL_LOCKS) >> 1) + & (new->securebits ^ arg2)) /*[1]*/ + || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ + || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ + || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -705,50 +689,51 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * [4] doing anything requires privilege (go read about * the "sendmail capabilities bug") */ - error = -EPERM; /* cannot change a locked bit */ - } else { - cred->securebits = arg2; - } - break; + ) + /* cannot change a locked bit */ + goto error; + new->securebits = arg2; + goto changed; + case PR_GET_SECUREBITS: - error = cred->securebits; - break; + error = new->securebits; + goto no_change; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; - break; + goto no_change; + case PR_SET_KEEPCAPS: + error = -EINVAL; if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ - error = -EINVAL; - else if (issecure(SECURE_KEEP_CAPS_LOCKED)) - error = -EPERM; - else if (arg2) - cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); + goto error; + error = -EPERM; + if (issecure(SECURE_KEEP_CAPS_LOCKED)) + goto error; + if (arg2) + new->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); - break; + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + goto changed; default: /* No functionality available - continue with default */ - return 0; + error = -ENOSYS; + goto error; } /* Functionality provided */ - *rc_p = error; - return 1; -} - -void cap_task_reparent_to_init (struct task_struct *p) -{ - struct cred *cred = p->cred; - - cap_set_init_eff(cred->cap_effective); - cap_clear(cred->cap_inheritable); - cap_set_full(cred->cap_permitted); - p->cred->securebits = SECUREBITS_DEFAULT; +changed: + return commit_creds(new); + +no_change: + error = 0; +error: + abort_creds(new); + return error; } int cap_syslog (int type) diff --git a/security/keys/internal.h b/security/keys/internal.h index d1586c629788..81932abefe7b 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -12,6 +12,7 @@ #ifndef _INTERNAL_H #define _INTERNAL_H +#include #include static inline __attribute__((format(printf, 1, 2))) @@ -25,7 +26,7 @@ void no_printk(const char *fmt, ...) #define kleave(FMT, ...) \ printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) \ - printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) + printk(KERN_DEBUG " "FMT"\n", ##__VA_ARGS__) #else #define kenter(FMT, ...) \ no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) @@ -97,7 +98,7 @@ extern struct key *keyring_search_instkey(struct key *keyring, typedef int (*key_match_func_t)(const struct key *, const void *); extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *tsk, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match); @@ -105,13 +106,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, extern key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *tsk); + const struct cred *cred); extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); extern int install_user_keyrings(void); -extern int install_thread_keyring(void); -extern int install_process_keyring(void); +extern int install_thread_keyring_to_cred(struct cred *); +extern int install_process_keyring_to_cred(struct cred *); extern struct key *request_key_and_link(struct key_type *type, const char *description, @@ -130,12 +131,12 @@ extern long join_session_keyring(const char *name); * check to see whether permission is granted to use a key in the desired way */ extern int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - return key_task_permission(key_ref, current, perm); + return key_task_permission(key_ref, current_cred(), perm); } /* required permissions */ @@ -153,7 +154,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) struct request_key_auth { struct key *target_key; struct key *dest_keyring; - struct task_struct *context; + const struct cred *cred; void *callout_info; size_t callout_len; pid_t pid; diff --git a/security/keys/key.c b/security/keys/key.c index a6ca39ed3b0e..f76c8a546fd3 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -218,7 +218,7 @@ serial_exists: * instantiate the key or discard it before returning */ struct key *key_alloc(struct key_type *type, const char *desc, - uid_t uid, gid_t gid, struct task_struct *ctx, + uid_t uid, gid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags) { struct key_user *user = NULL; @@ -294,7 +294,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, #endif /* let the security module know about the key */ - ret = security_key_alloc(key, ctx, flags); + ret = security_key_alloc(key, cred, flags); if (ret < 0) goto security_error; @@ -391,7 +391,7 @@ static int __key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret, awaken; @@ -421,8 +421,8 @@ static int __key_instantiate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } } @@ -444,14 +444,14 @@ int key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret; if (keyring) down_write(&keyring->sem); - ret = __key_instantiate_and_link(key, data, datalen, keyring, instkey); + ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey); if (keyring) up_write(&keyring->sem); @@ -469,7 +469,7 @@ EXPORT_SYMBOL(key_instantiate_and_link); int key_negate_and_link(struct key *key, unsigned timeout, struct key *keyring, - struct key *instkey) + struct key *authkey) { struct timespec now; int ret, awaken; @@ -504,8 +504,8 @@ int key_negate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } mutex_unlock(&key_construction_mutex); @@ -743,6 +743,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_perm_t perm, unsigned long flags) { + const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; key_ref_t key_ref; @@ -802,8 +803,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } /* allocate a new key */ - key = key_alloc(ktype, description, current_fsuid(), current_fsgid(), - current, perm, flags); + key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, + perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_3; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 8833b447adef..7c72baa02f2e 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -866,6 +866,23 @@ static long get_instantiation_keyring(key_serial_t ringid, return -ENOKEY; } +/* + * change the request_key authorisation key on the current process + */ +static int keyctl_change_reqkey_auth(struct key *key) +{ + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + key_put(new->request_key_auth); + new->request_key_auth = key_get(key); + + return commit_creds(new); +} + /*****************************************************************************/ /* * instantiate the key with the specified payload, and, if one is given, link @@ -876,12 +893,15 @@ long keyctl_instantiate_key(key_serial_t id, size_t plen, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; void *payload; long ret; bool vm = false; + kenter("%d,,%zu,%d", id, plen, ringid); + ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; @@ -889,7 +909,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -931,10 +951,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error2: if (!vm) @@ -953,14 +971,17 @@ error: */ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; long ret; + kenter("%d,%u,%d", id, timeout, ringid); + /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -982,10 +1003,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error: return ret; @@ -999,36 +1018,56 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { - struct cred *cred = current->cred; - int ret; + struct cred *new; + int ret, old_setting; + + old_setting = current_cred_xxx(jit_keyring); + + if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE) + return old_setting; + + new = prepare_creds(); + if (!new) + return -ENOMEM; switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: - ret = install_thread_keyring(); + ret = install_thread_keyring_to_cred(new); if (ret < 0) - return ret; + goto error; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - ret = install_process_keyring(); - if (ret < 0) - return ret; + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + if (ret != -EEXIST) + goto error; + ret = 0; + } + goto set; case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_SESSION_KEYRING: case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - set: - cred->jit_keyring = reqkey_defl; + case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: + goto set; case KEY_REQKEY_DEFL_NO_CHANGE: - return cred->jit_keyring; - case KEY_REQKEY_DEFL_GROUP_KEYRING: default: - return -EINVAL; + ret = -EINVAL; + goto error; } +set: + new->jit_keyring = reqkey_defl; + commit_creds(new); + return old_setting; +error: + abort_creds(new); + return -EINVAL; + } /* end keyctl_set_reqkey_keyring() */ /*****************************************************************************/ @@ -1087,9 +1126,7 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - ret = 0; + ret = keyctl_change_reqkey_auth(NULL); goto error; } @@ -1104,10 +1141,12 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = authkey; - ret = authkey->serial; + ret = keyctl_change_reqkey_auth(authkey); + if (ret < 0) + goto error; + key_put(authkey); + ret = authkey->serial; error: return ret; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index fdf75f901991..ed851574d073 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -245,14 +245,14 @@ static long keyring_read(const struct key *keyring, * allocate a keyring and link into the destination keyring */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, unsigned long flags, + const struct cred *cred, unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, ctx, + uid, gid, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, flags); @@ -281,7 +281,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * - we propagate the possession attribute from the keyring ref to the key ref */ key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *context, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match) @@ -304,7 +304,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, key_check(keyring); /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, context, KEY_SEARCH); + err = key_task_permission(keyring_ref, cred, KEY_SEARCH); if (err < 0) { key_ref = ERR_PTR(err); goto error; @@ -377,7 +377,7 @@ descend: /* key must have search permissions */ if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* we set a different error code if we pass a negative key */ @@ -404,7 +404,7 @@ ascend: continue; if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* stack the current position */ @@ -459,7 +459,7 @@ key_ref_t keyring_search(key_ref_t keyring, if (!type->match) return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, current, + return keyring_search_aux(keyring, current->cred, type, description, type->match); } /* end keyring_search() */ diff --git a/security/keys/permission.c b/security/keys/permission.c index 13c36164f284..5d9fc7b93f2e 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -14,24 +14,27 @@ #include "internal.h" /*****************************************************************************/ -/* - * check to see whether permission is granted to use a key in the desired way, - * but permit the security modules to override +/** + * key_task_permission - Check a key can be used + * @key_ref: The key to check + * @cred: The credentials to use + * @perm: The permissions to check for + * + * Check to see whether permission is granted to use a key in the desired way, + * but permit the security modules to override. + * + * The caller must hold either a ref on cred or must hold the RCU readlock or a + * spinlock. */ -int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, +int key_task_permission(const key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { - const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); - rcu_read_lock(); - cred = __task_cred(context); - /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -57,7 +60,6 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: - rcu_read_lock(); /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions @@ -71,7 +73,7 @@ use_these_perms: return -EACCES; /* let LSM be the final arbiter */ - return security_key_permission(key_ref, context, perm); + return security_key_permission(key_ref, cred, perm); } /* end key_task_permission() */ diff --git a/security/keys/proc.c b/security/keys/proc.c index f619170da760..7f508def50e3 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -136,8 +136,12 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; /* check whether the current task is allowed to view the key (assuming - * non-possession) */ - rc = key_task_permission(make_key_ref(key, 0), current, KEY_VIEW); + * non-possession) + * - the caller holds a spinlock, and thus the RCU read lock, making our + * access to __current_cred() safe + */ + rc = key_task_permission(make_key_ref(key, 0), current_cred(), + KEY_VIEW); if (rc < 0) return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 70ee93406f30..df329f684a65 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,11 +42,15 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->cred->user; + struct user_struct *user; + const struct cred *cred; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; + cred = current_cred(); + user = cred->user; + kenter("%p{%u}", user, user->uid); if (user->uid_keyring) { @@ -67,7 +71,7 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -83,8 +87,7 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, - NULL); + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -116,142 +119,128 @@ error: return ret; } -/*****************************************************************************/ /* - * deal with the UID changing + * install a fresh thread keyring directly to new credentials */ -void switch_uid_keyring(struct user_struct *new_user) +int install_thread_keyring_to_cred(struct cred *new) { -#if 0 /* do nothing for now */ - struct key *old; - - /* switch to the new user's session keyring if we were running under - * root's default session keyring */ - if (new_user->uid != 0 && - current->session_keyring == &root_session_keyring - ) { - atomic_inc(&new_user->session_keyring->usage); - - task_lock(current); - old = current->session_keyring; - current->session_keyring = new_user->session_keyring; - task_unlock(current); + struct key *keyring; - key_put(old); - } -#endif + keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); -} /* end switch_uid_keyring() */ + new->thread_keyring = keyring; + return 0; +} -/*****************************************************************************/ /* * install a fresh thread keyring, discarding the old one */ -int install_thread_keyring(void) +static int install_thread_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring, *old; - char buf[20]; + struct cred *new; int ret; - sprintf(buf, "_tid.%u", tsk->pid); + new = prepare_creds(); + if (!new) + return -ENOMEM; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; + BUG_ON(new->thread_keyring); + + ret = install_thread_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret; } - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = keyring; - task_unlock(tsk); + return commit_creds(new); +} - ret = 0; +/* + * install a process keyring directly to a credentials struct + * - returns -EEXIST if there was already a process keyring, 0 if one installed, + * and other -ve on any other error + */ +int install_process_keyring_to_cred(struct cred *new) +{ + struct key *keyring; + int ret; - key_put(old); -error: + if (new->tgcred->process_keyring) + return -EEXIST; + + keyring = keyring_alloc("_pid", new->uid, new->gid, + new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + + spin_lock_irq(&new->tgcred->lock); + if (!new->tgcred->process_keyring) { + new->tgcred->process_keyring = keyring; + keyring = NULL; + ret = 0; + } else { + ret = -EEXIST; + } + spin_unlock_irq(&new->tgcred->lock); + key_put(keyring); return ret; +} -} /* end install_thread_keyring() */ - -/*****************************************************************************/ /* * make sure a process keyring is installed + * - we */ -int install_process_keyring(void) +static int install_process_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring; - char buf[20]; + struct cred *new; int ret; - might_sleep(); - - if (!tsk->cred->tgcred->process_keyring) { - sprintf(buf, "_pid.%u", tsk->tgid); - - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; - } - - /* attach keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - if (!tsk->cred->tgcred->process_keyring) { - tsk->cred->tgcred->process_keyring = keyring; - keyring = NULL; - } - spin_unlock_irq(&tsk->cred->tgcred->lock); + new = prepare_creds(); + if (!new) + return -ENOMEM; - key_put(keyring); + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret != -EEXIST ?: 0; } - ret = 0; -error: - return ret; - -} /* end install_process_keyring() */ + return commit_creds(new); +} -/*****************************************************************************/ /* - * install a session keyring, discarding the old one - * - if a keyring is not supplied, an empty one is invented + * install a session keyring directly to a credentials struct */ -static int install_session_keyring(struct key *keyring) +static int install_session_keyring_to_cred(struct cred *cred, + struct key *keyring) { - struct task_struct *tsk = current; unsigned long flags; struct key *old; - char buf[20]; might_sleep(); /* create an empty session keyring */ if (!keyring) { - sprintf(buf, "_ses.%u", tsk->tgid); - flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->cred->tgcred->session_keyring) + if (cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, - tsk, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, + cred, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); - } - else { + } else { atomic_inc(&keyring->usage); } /* install the keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->session_keyring; - rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&tsk->cred->tgcred->lock); + spin_lock_irq(&cred->tgcred->lock); + old = cred->tgcred->session_keyring; + rcu_assign_pointer(cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -261,38 +250,29 @@ static int install_session_keyring(struct key *keyring) } return 0; +} -} /* end install_session_keyring() */ - -/*****************************************************************************/ /* - * copy the keys for fork + * install a session keyring, discarding the old one + * - if a keyring is not supplied, an empty one is invented */ -int copy_keys(unsigned long clone_flags, struct task_struct *tsk) +static int install_session_keyring(struct key *keyring) { - key_check(tsk->cred->thread_keyring); - key_check(tsk->cred->request_key_auth); - - /* no thread keyring yet */ - tsk->cred->thread_keyring = NULL; - - /* copy the request_key() authorisation for this thread */ - key_get(tsk->cred->request_key_auth); - - return 0; + struct cred *new; + int ret; -} /* end copy_keys() */ + new = prepare_creds(); + if (!new) + return -ENOMEM; -/*****************************************************************************/ -/* - * dispose of per-thread keys upon thread exit - */ -void exit_keys(struct task_struct *tsk) -{ - key_put(tsk->cred->thread_keyring); - key_put(tsk->cred->request_key_auth); + ret = install_session_keyring_to_cred(new, NULL); + if (ret < 0) { + abort_creds(new); + return ret; + } -} /* end exit_keys() */ + return commit_creds(new); +} /*****************************************************************************/ /* @@ -300,38 +280,41 @@ void exit_keys(struct task_struct *tsk) */ int exec_keys(struct task_struct *tsk) { - struct key *old; + struct thread_group_cred *tgcred = NULL; + struct cred *new; - /* newly exec'd tasks don't get a thread keyring */ - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = NULL; - task_unlock(tsk); +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) + return -ENOMEM; +#endif - key_put(old); + new = prepare_creds(); + if (new < 0) + return -ENOMEM; - /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->process_keyring; - tsk->cred->tgcred->process_keyring = NULL; - spin_unlock_irq(&tsk->cred->tgcred->lock); + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); + new->thread_keyring = NULL; - key_put(old); + /* create a new per-thread-group creds for all this set of threads to + * share */ + memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - return 0; + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); -} /* end exec_keys() */ + /* inherit the session keyring; new process keyring */ + key_get(tgcred->session_keyring); + tgcred->process_keyring = NULL; -/*****************************************************************************/ -/* - * deal with SUID programs - * - we might want to make this invent a new session keyring - */ -int suid_keys(struct task_struct *tsk) -{ + release_tgcred(new); + new->tgcred = tgcred; + + commit_creds(new); return 0; -} /* end suid_keys() */ +} /* end exec_keys() */ /*****************************************************************************/ /* @@ -376,16 +359,13 @@ void key_fsgid_changed(struct task_struct *tsk) key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *context) + const struct cred *cred) { struct request_key_auth *rka; - struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); - cred = get_task_cred(context); - /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -401,7 +381,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->thread_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->thread_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -422,7 +402,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->tgcred->process_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -446,7 +426,7 @@ key_ref_t search_process_keyrings(struct key_type *type, make_key_ref(rcu_dereference( cred->tgcred->session_keyring), 1), - context, type, description, match); + cred, type, description, match); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -468,7 +448,7 @@ key_ref_t search_process_keyrings(struct key_type *type, else if (cred->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->user->session_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -490,7 +470,7 @@ key_ref_t search_process_keyrings(struct key_type *type, * - we don't permit access to request_key auth keys via this method */ if (cred->request_key_auth && - context == current && + cred == current_cred() && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ @@ -500,7 +480,7 @@ key_ref_t search_process_keyrings(struct key_type *type, rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, - match, rka->context); + match, rka->cred); up_read(&cred->request_key_auth->sem); @@ -527,7 +507,6 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: - put_cred(cred); return key_ref; } /* end search_process_keyrings() */ @@ -552,8 +531,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key_perm_t perm) { struct request_key_auth *rka; - struct task_struct *t = current; - struct cred *cred; + const struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; @@ -608,6 +586,7 @@ try_again: goto error; ret = install_session_keyring( cred->user->session_keyring); + if (ret < 0) goto error; goto reget_creds; @@ -693,7 +672,7 @@ try_again: /* check to see if we possess the key */ skey_ref = search_process_keyrings(key->type, key, lookup_user_key_possessed, - current); + cred); if (!IS_ERR(skey_ref)) { key_put(key); @@ -725,7 +704,7 @@ try_again: goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, t, perm); + ret = key_task_permission(key_ref, cred, perm); if (ret < 0) goto invalid_key; @@ -755,21 +734,33 @@ reget_creds: */ long join_session_keyring(const char *name) { - struct task_struct *tsk = current; - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; struct key *keyring; - long ret; + long ret, serial; + + /* only permit this if there's a single thread in the thread group - + * this avoids us having to adjust the creds on all threads and risking + * ENOMEM */ + if (!is_single_threaded(current)) + return -EMLINK; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); /* if no name is provided, install an anonymous keyring */ if (!name) { - ret = install_session_keyring(NULL); + ret = install_session_keyring_to_cred(new, NULL); if (ret < 0) goto error; - rcu_read_lock(); - ret = rcu_dereference(cred->tgcred->session_keyring)->serial; - rcu_read_unlock(); - goto error; + serial = new->tgcred->session_keyring->serial; + ret = commit_creds(new); + if (ret == 0) + ret = serial; + goto okay; } /* allow the user to join or create a named keyring */ @@ -779,29 +770,33 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, + keyring = keyring_alloc(name, old->uid, old->gid, old, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } - } - else if (IS_ERR(keyring)) { + } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } /* we've got a keyring - now to install it */ - ret = install_session_keyring(keyring); + ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) goto error2; + commit_creds(new); + mutex_unlock(&key_session_mutex); + ret = keyring->serial; key_put(keyring); +okay: + return ret; error2: mutex_unlock(&key_session_mutex); error: + abort_creds(new); return ret; - -} /* end join_session_keyring() */ +} diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3d12558362df..0e04f72ef2d4 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -83,8 +83,10 @@ static int call_sbin_request_key(struct key_construction *cons, /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); - keyring = keyring_alloc(desc, current_fsuid(), current_fsgid(), current, + cred = get_current_cred(); + keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, KEY_ALLOC_QUOTA_OVERRUN, NULL); + put_cred(cred); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error_alloc; @@ -104,8 +106,7 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - cred->thread_keyring ? - cred->thread_keyring->serial : 0); + cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; if (cred->tgcred->process_keyring) @@ -155,8 +156,8 @@ error_link: key_put(keyring); error_alloc: - kleave(" = %d", ret); complete_request_key(cons, ret); + kleave(" = %d", ret); return ret; } @@ -295,6 +296,7 @@ static int construct_alloc_key(struct key_type *type, struct key_user *user, struct key **_key) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -302,9 +304,8 @@ static int construct_alloc_key(struct key_type *type, mutex_lock(&user->cons_lock); - key = key_alloc(type, description, - current_fsuid(), current_fsgid(), current, KEY_POS_ALL, - flags); + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL, flags); if (IS_ERR(key)) goto alloc_failed; @@ -317,8 +318,7 @@ static int construct_alloc_key(struct key_type *type, * waited for locks */ mutex_lock(&key_construction_mutex); - key_ref = search_process_keyrings(type, description, type->match, - current); + key_ref = search_process_keyrings(type, description, type->match, cred); if (!IS_ERR(key_ref)) goto key_already_present; @@ -363,6 +363,8 @@ static struct key *construct_key_and_link(struct key_type *type, struct key *key; int ret; + kenter(""); + user = key_user_lookup(current_fsuid()); if (!user) return ERR_PTR(-ENOMEM); @@ -376,17 +378,21 @@ static struct key *construct_key_and_link(struct key_type *type, if (ret == 0) { ret = construct_key(key, callout_info, callout_len, aux, dest_keyring); - if (ret < 0) + if (ret < 0) { + kdebug("cons failed"); goto construction_failed; + } } key_put(dest_keyring); + kleave(" = key %d", key_serial(key)); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); key_put(dest_keyring); + kleave(" = %d", ret); return ERR_PTR(ret); } @@ -405,6 +411,7 @@ struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -414,7 +421,7 @@ struct key *request_key_and_link(struct key_type *type, /* search all the process keyrings for a key */ key_ref = search_process_keyrings(type, description, type->match, - current); + cred); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 2125579d5d73..86747151ee5b 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -105,9 +105,9 @@ static void request_key_auth_revoke(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } } /* end request_key_auth_revoke() */ @@ -122,9 +122,9 @@ static void request_key_auth_destroy(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } key_put(rka->target_key); @@ -143,6 +143,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; + const struct cred *cred = current->cred; struct key *authkey = NULL; char desc[20]; int ret; @@ -164,28 +165,25 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->cred->request_key_auth) { + if (cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ - if (test_bit(KEY_FLAG_REVOKED, - ¤t->cred->request_key_auth->flags)) + if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->cred->request_key_auth->payload.data; - rka->context = irka->context; + irka = cred->request_key_auth->payload.data; + rka->cred = get_cred(irka->cred); rka->pid = irka->pid; - get_task_struct(rka->context); - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ - rka->context = current; + rka->cred = get_cred(cred); rka->pid = current->pid; - get_task_struct(rka->context); } rka->target_key = key_get(target); @@ -197,7 +195,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, sprintf(desc, "%x", target->serial); authkey = key_alloc(&key_type_request_key_auth, desc, - current_fsuid(), current_fsgid(), current, + cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(authkey)) { @@ -205,16 +203,16 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, goto error_alloc; } - /* construct and attach to the keyring */ + /* construct the auth key */ ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); if (ret < 0) goto error_inst; - kleave(" = {%d}", authkey->serial); + kleave(" = {%d,%d}", authkey->serial, atomic_read(&authkey->usage)); return authkey; auth_key_revoked: - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); @@ -257,6 +255,7 @@ static int key_get_instantiation_authkey_match(const struct key *key, */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { + const struct cred *cred = current_cred(); struct key *authkey; key_ref_t authkey_ref; @@ -264,7 +263,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) &key_type_request_key_auth, (void *) (unsigned long) target_id, key_get_instantiation_authkey_match, - current); + cred); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); diff --git a/security/security.c b/security/security.c index f40a0a04c3c2..a55d739c6864 100644 --- a/security/security.c +++ b/security/security.c @@ -145,18 +145,13 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return security_ops->capset_check(effective, inheritable, permitted); -} - -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - security_ops->capset_set(effective, inheritable, permitted); + return security_ops->capset(new, old, + effective, inheritable, permitted); } int security_capable(struct task_struct *tsk, int cap) @@ -228,9 +223,9 @@ void security_bprm_free(struct linux_binprm *bprm) security_ops->bprm_free_security(bprm); } -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - security_ops->bprm_apply_creds(bprm, unsafe); + return security_ops->bprm_apply_creds(bprm, unsafe); } void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -616,14 +611,19 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_cred_alloc(struct cred *cred) +void security_cred_free(struct cred *cred) { - return security_ops->cred_alloc_security(cred); + security_ops->cred_free(cred); } -void security_cred_free(struct cred *cred) +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - security_ops->cred_free(cred); + return security_ops->cred_prepare(new, old, gfp); +} + +void security_commit_creds(struct cred *new, const struct cred *old) +{ + return security_ops->cred_commit(new, old); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -631,10 +631,10 @@ int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return security_ops->task_setuid(id0, id1, id2, flags); } -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return security_ops->task_post_setuid(old_ruid, old_euid, old_suid, flags); + return security_ops->task_fix_setuid(new, old, flags); } int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -716,14 +716,9 @@ int security_task_wait(struct task_struct *p) } int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) -{ - return security_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); -} - -void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg4, unsigned long arg5) { - security_ops->task_reparent_to_init(p); + return security_ops->task_prctl(option, arg2, arg3, arg4, arg5); } void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -1123,9 +1118,10 @@ EXPORT_SYMBOL(security_skb_classify_flow); #ifdef CONFIG_KEYS -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) +int security_key_alloc(struct key *key, const struct cred *cred, + unsigned long flags) { - return security_ops->key_alloc(key, tsk, flags); + return security_ops->key_alloc(key, cred, flags); } void security_key_free(struct key *key) @@ -1134,9 +1130,9 @@ void security_key_free(struct key *key) } int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { - return security_ops->key_permission(key_ref, context, perm); + return security_ops->key_permission(key_ref, cred, perm); } int security_key_getsecurity(struct key *key, char **_buffer) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f20cbd681ba6..c71bba78872f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -156,20 +156,20 @@ static int selinux_secmark_enabled(void) return (atomic_read(&selinux_secmark_refcount) > 0); } -/* Allocate and free functions for each kind of security blob. */ - -static int cred_alloc_security(struct cred *cred) +/* + * initialise the security for the init task + */ +static void cred_init_security(void) { + struct cred *cred = (struct cred *) current->cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); if (!tsec) - return -ENOMEM; + panic("SELinux: Failed to initialize initial task.\n"); - tsec->osid = tsec->sid = SECINITSID_UNLABELED; + tsec->osid = tsec->sid = SECINITSID_KERNEL; cred->security = tsec; - - return 0; } /* @@ -1378,6 +1378,19 @@ static inline u32 signal_to_av(int sig) return perm; } +/* + * Check permission between a pair of credentials + * fork check, ptrace check, etc. + */ +static int cred_has_perm(const struct cred *actor, + const struct cred *target, + u32 perms) +{ + u32 asid = cred_sid(actor), tsid = cred_sid(target); + + return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); +} + /* * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. @@ -1820,24 +1833,19 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static int selinux_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { int error; - error = secondary_ops->capset_check(effective, inheritable, permitted); + error = secondary_ops->capset(new, old, + effective, inheritable, permitted); if (error) return error; - return task_has_perm(current, current, PROCESS__SETCAP); -} - -static void selinux_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - secondary_ops->capset_set(effective, inheritable, permitted); + return cred_has_perm(old, new, PROCESS__SETCAP); } static int selinux_capable(struct task_struct *tsk, int cap, int audit) @@ -2244,16 +2252,23 @@ static inline void flush_unauthorized_files(const struct cred *cred, spin_unlock(&files->file_lock); } -static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { struct task_security_struct *tsec; struct bprm_security_struct *bsec; + struct cred *new; u32 sid; int rc; - secondary_ops->bprm_apply_creds(bprm, unsafe); + rc = secondary_ops->bprm_apply_creds(bprm, unsafe); + if (rc < 0) + return rc; - tsec = current_security(); + new = prepare_creds(); + if (!new) + return -ENOMEM; + + tsec = new->security; bsec = bprm->security; sid = bsec->sid; @@ -2268,7 +2283,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__SHARE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } @@ -2292,12 +2307,16 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__PTRACE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } } tsec->sid = sid; } + +out: + commit_creds(new); + return 0; } /* @@ -3021,6 +3040,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, static int file_map_prot_check(struct file *file, unsigned long prot, int shared) { const struct cred *cred = current_cred(); + int rc = 0; #ifndef CONFIG_PPC32 if ((prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { @@ -3029,9 +3049,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared * private file mapping that will also be writable. * This has an additional check. */ - int rc = task_has_perm(current, current, PROCESS__EXECMEM); + rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); if (rc) - return rc; + goto error; } #endif @@ -3048,7 +3068,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared return file_has_perm(cred, file, av); } - return 0; + +error: + return rc; } static int selinux_file_mmap(struct file *file, unsigned long reqprot, @@ -3090,8 +3112,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, rc = 0; if (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk) { - rc = task_has_perm(current, current, - PROCESS__EXECHEAP); + rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { @@ -3104,8 +3125,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, * modified content. This typically should only * occur for text relocations. */ - rc = file_has_perm(cred, vma->vm_file, - FILE__EXECMOD); + rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); } if (rc) return rc; @@ -3211,6 +3231,7 @@ static int selinux_dentry_open(struct file *file, const struct cred *cred) struct file_security_struct *fsec; struct inode *inode; struct inode_security_struct *isec; + inode = file->f_path.dentry->d_inode; fsec = file->f_security; isec = inode->i_security; @@ -3247,38 +3268,41 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_cred_alloc_security(struct cred *cred) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - struct task_security_struct *tsec1, *tsec2; - int rc; - - tsec1 = current_security(); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); +} - rc = cred_alloc_security(cred); - if (rc) - return rc; - tsec2 = cred->security; +/* + * prepare a new set of credentials for modification + */ +static int selinux_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + const struct task_security_struct *old_tsec; + struct task_security_struct *tsec; - tsec2->osid = tsec1->osid; - tsec2->sid = tsec1->sid; + old_tsec = old->security; - /* Retain the exec, fs, key, and sock SIDs across fork */ - tsec2->exec_sid = tsec1->exec_sid; - tsec2->create_sid = tsec1->create_sid; - tsec2->keycreate_sid = tsec1->keycreate_sid; - tsec2->sockcreate_sid = tsec1->sockcreate_sid; + tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); + if (!tsec) + return -ENOMEM; + new->security = tsec; return 0; } /* - * detach and free the LSM part of a set of credentials + * commit new credentials */ -static void selinux_cred_free(struct cred *cred) +static void selinux_cred_commit(struct cred *new, const struct cred *old) { - struct task_security_struct *tsec = cred->security; - cred->security = NULL; - kfree(tsec); + secondary_ops->cred_commit(new, old); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -3292,9 +3316,10 @@ static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return 0; } -static int selinux_task_post_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) +static int selinux_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return secondary_ops->task_post_setuid(id0, id1, id2, flags); + return secondary_ops->task_fix_setuid(new, old, flags); } static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -3368,7 +3393,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit - upon context transitions. See selinux_bprm_apply_creds. */ + upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) return task_has_perm(current, current, PROCESS__SETRLIMIT); @@ -3422,13 +3447,12 @@ static int selinux_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, - long *rc_p) + unsigned long arg5) { /* The current prctl operations do not appear to require any SELinux controls since they merely observe or modify the state of the current process. */ - return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); + return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5); } static int selinux_task_wait(struct task_struct *p) @@ -3436,18 +3460,6 @@ static int selinux_task_wait(struct task_struct *p) return task_has_perm(p, current, PROCESS__SIGCHLD); } -static void selinux_task_reparent_to_init(struct task_struct *p) -{ - struct task_security_struct *tsec; - - secondary_ops->task_reparent_to_init(p); - - tsec = p->cred->security; - tsec->osid = tsec->sid; - tsec->sid = SECINITSID_KERNEL; - return; -} - static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { @@ -5325,7 +5337,8 @@ static int selinux_setprocattr(struct task_struct *p, { struct task_security_struct *tsec; struct task_struct *tracer; - u32 sid = 0; + struct cred *new; + u32 sid = 0, ptsid; int error; char *str = value; @@ -5372,86 +5385,75 @@ static int selinux_setprocattr(struct task_struct *p, return error; } + new = prepare_creds(); + if (!new) + return -ENOMEM; + /* Permission checking based on the specified context is performed during the actual operation (execve, open/mkdir/...), when we know the full context of the - operation. See selinux_bprm_set_security for the execve + operation. See selinux_bprm_set_creds for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->cred->security; - if (!strcmp(name, "exec")) + tsec = new->security; + if (!strcmp(name, "exec")) { tsec->exec_sid = sid; - else if (!strcmp(name, "fscreate")) + } else if (!strcmp(name, "fscreate")) { tsec->create_sid = sid; - else if (!strcmp(name, "keycreate")) { + } else if (!strcmp(name, "keycreate")) { error = may_create_key(sid, p); if (error) - return error; + goto abort_change; tsec->keycreate_sid = sid; - } else if (!strcmp(name, "sockcreate")) + } else if (!strcmp(name, "sockcreate")) { tsec->sockcreate_sid = sid; - else if (!strcmp(name, "current")) { - struct av_decision avd; - + } else if (!strcmp(name, "current")) { + error = -EINVAL; if (sid == 0) - return -EINVAL; - /* - * SELinux allows to change context in the following case only. - * - Single threaded processes. - * - Multi threaded processes intend to change its context into - * more restricted domain (defined by TYPEBOUNDS statement). - */ - if (atomic_read(&p->mm->mm_users) != 1) { - struct task_struct *g, *t; - struct mm_struct *mm = p->mm; - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (t->mm == mm && t != p) { - read_unlock(&tasklist_lock); - error = security_bounded_transition(tsec->sid, sid); - if (!error) - goto boundary_ok; - - return error; - } - } while_each_thread(g, t); - read_unlock(&tasklist_lock); + goto abort_change; + + /* Only allow single threaded processes to change context */ + error = -EPERM; + if (!is_single_threaded(p)) { + error = security_bounded_transition(tsec->sid, sid); + if (error) + goto abort_change; } -boundary_ok: /* Check permissions for the transition. */ error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, PROCESS__DYNTRANSITION, NULL); if (error) - return error; + goto abort_change; /* Check for ptracing, and update the task SID if ok. Otherwise, leave SID unchanged and fail. */ + ptsid = 0; task_lock(p); - rcu_read_lock(); tracer = tracehook_tracer_task(p); - if (tracer != NULL) { - u32 ptsid = task_sid(tracer); - rcu_read_unlock(); - error = avc_has_perm_noaudit(ptsid, sid, - SECCLASS_PROCESS, - PROCESS__PTRACE, 0, &avd); - if (!error) - tsec->sid = sid; - task_unlock(p); - avc_audit(ptsid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, &avd, error, NULL); + if (tracer) + ptsid = task_sid(tracer); + task_unlock(p); + + if (tracer) { + error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, + PROCESS__PTRACE, NULL); if (error) - return error; - } else { - rcu_read_unlock(); - tsec->sid = sid; - task_unlock(p); + goto abort_change; } - } else - return -EINVAL; + tsec->sid = sid; + } else { + error = -EINVAL; + goto abort_change; + } + + commit_creds(new); return size; + +abort_change: + abort_creds(new); + return error; } static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) @@ -5471,23 +5473,21 @@ static void selinux_release_secctx(char *secdata, u32 seclen) #ifdef CONFIG_KEYS -static int selinux_key_alloc(struct key *k, struct task_struct *tsk, +static int selinux_key_alloc(struct key *k, const struct cred *cred, unsigned long flags) { - const struct task_security_struct *__tsec; + const struct task_security_struct *tsec; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); if (!ksec) return -ENOMEM; - rcu_read_lock(); - __tsec = __task_cred(tsk)->security; - if (__tsec->keycreate_sid) - ksec->sid = __tsec->keycreate_sid; + tsec = cred->security; + if (tsec->keycreate_sid) + ksec->sid = tsec->keycreate_sid; else - ksec->sid = __tsec->sid; - rcu_read_unlock(); + ksec->sid = tsec->sid; k->security = ksec; return 0; @@ -5502,8 +5502,8 @@ static void selinux_key_free(struct key *k) } static int selinux_key_permission(key_ref_t key_ref, - struct task_struct *ctx, - key_perm_t perm) + const struct cred *cred, + key_perm_t perm) { struct key *key; struct key_security_struct *ksec; @@ -5515,7 +5515,7 @@ static int selinux_key_permission(key_ref_t key_ref, if (perm == 0) return 0; - sid = task_sid(ctx); + sid = cred_sid(cred); key = key_ref_to_ptr(key_ref); ksec = key->security; @@ -5545,8 +5545,7 @@ static struct security_operations selinux_ops = { .ptrace_may_access = selinux_ptrace_may_access, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, - .capset_check = selinux_capset_check, - .capset_set = selinux_capset_set, + .capset = selinux_capset, .sysctl = selinux_sysctl, .capable = selinux_capable, .quotactl = selinux_quotactl, @@ -5621,10 +5620,11 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .cred_alloc_security = selinux_cred_alloc_security, .cred_free = selinux_cred_free, + .cred_prepare = selinux_cred_prepare, + .cred_commit = selinux_cred_commit, .task_setuid = selinux_task_setuid, - .task_post_setuid = selinux_task_post_setuid, + .task_fix_setuid = selinux_task_fix_setuid, .task_setgid = selinux_task_setgid, .task_setpgid = selinux_task_setpgid, .task_getpgid = selinux_task_getpgid, @@ -5641,7 +5641,6 @@ static struct security_operations selinux_ops = { .task_kill = selinux_task_kill, .task_wait = selinux_task_wait, .task_prctl = selinux_task_prctl, - .task_reparent_to_init = selinux_task_reparent_to_init, .task_to_inode = selinux_task_to_inode, .ipc_permission = selinux_ipc_permission, @@ -5737,8 +5736,6 @@ static struct security_operations selinux_ops = { static __init int selinux_init(void) { - struct task_security_struct *tsec; - if (!security_module_enable(&selinux_ops)) { selinux_enabled = 0; return 0; @@ -5752,10 +5749,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (cred_alloc_security(current->cred)) - panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->cred->security; - tsec->osid = tsec->sid = SECINITSID_KERNEL; + cred_init_security(); sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 11167fd567b9..e952b397153d 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -104,8 +104,7 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->cred->security, ctp->cred->security, - MAY_READWRITE); + rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -127,8 +126,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->cred->security, current->cred->security, - MAY_READWRITE); + rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -976,22 +974,6 @@ static int smack_file_receive(struct file *file) * Task hooks */ -/** - * smack_cred_alloc_security - "allocate" a task cred blob - * @cred: the task creds in need of a blob - * - * Smack isn't using copies of blobs. Everyone - * points to an immutable list. No alloc required. - * No data copy required. - * - * Always returns 0 - */ -static int smack_cred_alloc_security(struct cred *cred) -{ - cred->security = current_security(); - return 0; -} - /** * smack_cred_free - "free" task-level security credentials * @cred: the credentials in question @@ -1005,6 +987,30 @@ static void smack_cred_free(struct cred *cred) cred->security = NULL; } +/** + * smack_cred_prepare - prepare new set of credentials for modification + * @new: the new credentials + * @old: the original credentials + * @gfp: the atomicity of any memory allocations + * + * Prepare a new set of credentials for modification. + */ +static int smack_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + new->security = old->security; + return 0; +} + +/* + * commit new credentials + * @new: the new credentials + * @old: the original credentials + */ +static void smack_cred_commit(struct cred *new, const struct cred *old) +{ +} + /** * smack_task_setpgid - Smack check on setting pgid * @p: the task object @@ -2036,6 +2042,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) static int smack_setprocattr(struct task_struct *p, char *name, void *value, size_t size) { + struct cred *new; char *newsmack; /* @@ -2058,7 +2065,11 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->cred->security = newsmack; + new = prepare_creds(); + if (!new) + return -ENOMEM; + new->security = newsmack; + commit_creds(new); return size; } @@ -2354,17 +2365,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, /** * smack_key_alloc - Set the key security blob * @key: object - * @tsk: the task associated with the key + * @cred: the credentials to use * @flags: unused * * No allocation required * * Returns 0 */ -static int smack_key_alloc(struct key *key, struct task_struct *tsk, +static int smack_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { - key->security = tsk->cred->security; + key->security = cred->security; return 0; } @@ -2382,14 +2393,14 @@ static void smack_key_free(struct key *key) /* * smack_key_permission - Smack access on a key * @key_ref: gets to the object - * @context: task involved + * @cred: the credentials to use * @perm: unused * * Return 0 if the task has read and write to the object, * an error code otherwise */ static int smack_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { struct key *keyp; @@ -2405,11 +2416,10 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->cred->security == NULL) + if (cred->security == NULL) return -EACCES; - return smk_access(context->cred->security, keyp->security, - MAY_READWRITE); + return smk_access(cred->security, keyp->security, MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2580,8 +2590,7 @@ struct security_operations smack_ops = { .ptrace_may_access = smack_ptrace_may_access, .ptrace_traceme = smack_ptrace_traceme, .capget = cap_capget, - .capset_check = cap_capset_check, - .capset_set = cap_capset_set, + .capset = cap_capset, .capable = cap_capable, .syslog = smack_syslog, .settime = cap_settime, @@ -2630,9 +2639,10 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .cred_alloc_security = smack_cred_alloc_security, .cred_free = smack_cred_free, - .task_post_setuid = cap_task_post_setuid, + .cred_prepare = smack_cred_prepare, + .cred_commit = smack_cred_commit, + .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, .task_getsid = smack_task_getsid, @@ -2645,7 +2655,6 @@ struct security_operations smack_ops = { .task_movememory = smack_task_movememory, .task_kill = smack_task_kill, .task_wait = smack_task_wait, - .task_reparent_to_init = cap_task_reparent_to_init, .task_to_inode = smack_task_to_inode, .task_prctl = cap_task_prctl, @@ -2721,6 +2730,8 @@ struct security_operations smack_ops = { */ static __init int smack_init(void) { + struct cred *cred; + if (!security_module_enable(&smack_ops)) return 0; @@ -2729,7 +2740,8 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->cred->security = &smack_known_floor.smk_known; + cred = (struct cred *) current->cred; + cred->security = &smack_known_floor.smk_known; /* * Initialize locks -- cgit v1.2.3 From a6f76f23d297f70e2a6b3ec607f7aeeea9e37e8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:24 +1100 Subject: CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/x86/ia32/ia32_aout.c | 2 +- fs/binfmt_aout.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/binfmt_flat.c | 2 +- fs/binfmt_som.c | 2 +- fs/compat.c | 42 +++--- fs/exec.c | 149 +++++++++++--------- fs/internal.h | 6 + include/linux/audit.h | 16 --- include/linux/binfmts.h | 16 ++- include/linux/cred.h | 3 +- include/linux/key.h | 2 - include/linux/security.h | 103 +++++--------- kernel/cred.c | 46 ++++++- security/capability.c | 19 +-- security/commoncap.c | 152 ++++++++++---------- security/keys/process_keys.c | 42 ------ security/root_plug.c | 13 +- security/security.c | 26 ++-- security/selinux/hooks.c | 283 ++++++++++++++++---------------------- security/selinux/include/objsec.h | 11 -- security/smack/smack_lsm.c | 3 +- 23 files changed, 429 insertions(+), 515 deletions(-) (limited to 'kernel') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 127ec3f07214..2a4d073d2cf1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -327,7 +327,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->cached_hole_size = 0; current->mm->mmap = NULL; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 204cfd1d7676..f1f3f4192a60 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -320,7 +320,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->free_area_cache = current->mm->mmap_base; current->mm->cached_hole_size = 0; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9142ff5dc8e6..f458c1217c5e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -956,7 +956,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 45dabd59936f..aa5b43205e37 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -404,7 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, current->mm->start_stack = current->mm->start_brk + stack_size; #endif - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index ccb781a6a804..7bbd5c6b3725 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -880,7 +880,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) (libinfo.lib_list[j].loaded)? libinfo.lib_list[j].start_data:UNLOADED_LIB; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; set_binfmt(&flat_format); diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 74e587a52796..08644a61616e 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -255,7 +255,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) kfree(hpuxhdr); set_binfmt(&som_format); - compute_creds(bprm); + install_exec_creds(bprm); setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); create_som_tables(bprm); diff --git a/fs/compat.c b/fs/compat.c index e5f49f538502..d1ece79b6411 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1393,10 +1393,20 @@ int compat_do_execve(char * filename, if (!bprm) goto out_ret; + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out_free; + + retval = -ENOMEM; + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; + check_unsafe_exec(bprm); + file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_kfree; + goto out_unlock; sched_exec(); @@ -1410,14 +1420,10 @@ int compat_do_execve(char * filename, bprm->argc = compat_count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) - goto out_mm; + goto out; bprm->envc = compat_count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(bprm); - if (retval) goto out; retval = prepare_binprm(bprm); @@ -1438,19 +1444,16 @@ int compat_do_execve(char * filename, goto out; retval = search_binary_handler(bprm, regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(bprm); - acct_update_integrals(current); - free_bprm(bprm); - return retval; - } + if (retval < 0) + goto out; -out: - if (bprm->security) - security_bprm_free(bprm); + /* execve succeeded */ + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); + return retval; -out_mm: +out: if (bprm->mm) mmput(bprm->mm); @@ -1460,7 +1463,10 @@ out_file: fput(bprm->file); } -out_kfree: +out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +out_free: free_bprm(bprm); out_ret: diff --git a/fs/exec.c b/fs/exec.c index 9bd3559ddece..32f13e299417 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include #include #include +#include "internal.h" #ifdef __alpha__ /* for /sbin/loader handling in search_binary_handler() */ @@ -1007,15 +1008,17 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current_euid() || - bprm->e_gid != current_egid()) { - set_dumpable(current->mm, suid_dumpable); + /* install the new credentials */ + if (bprm->cred->uid != current_euid() || + bprm->cred->gid != current_egid()) { current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || - (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { + bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) { set_dumpable(current->mm, suid_dumpable); } + current->personality &= ~bprm->per_clear; + /* An exec changes our domain. We are no longer part of the thread group */ @@ -1032,13 +1035,50 @@ out: EXPORT_SYMBOL(flush_old_exec); +/* + * install the new credentials for this executable + */ +void install_exec_creds(struct linux_binprm *bprm) +{ + security_bprm_committing_creds(bprm); + + commit_creds(bprm->cred); + bprm->cred = NULL; + + /* cred_exec_mutex must be held at least to this point to prevent + * ptrace_attach() from altering our determination of the task's + * credentials; any time after this it may be unlocked */ + + security_bprm_committed_creds(bprm); +} +EXPORT_SYMBOL(install_exec_creds); + +/* + * determine how safe it is to execute the proposed program + * - the caller must hold current->cred_exec_mutex to protect against + * PTRACE_ATTACH + */ +void check_unsafe_exec(struct linux_binprm *bprm) +{ + struct task_struct *p = current; + + bprm->unsafe = tracehook_unsafe_exec(p); + + if (atomic_read(&p->fs->count) > 1 || + atomic_read(&p->files->count) > 1 || + atomic_read(&p->sighand->count) > 1) + bprm->unsafe |= LSM_UNSAFE_SHARE; +} + /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes + * + * This may be called multiple times for binary chains (scripts for example). */ int prepare_binprm(struct linux_binprm *bprm) { - int mode; + umode_t mode; struct inode * inode = bprm->file->f_path.dentry->d_inode; int retval; @@ -1046,14 +1086,15 @@ int prepare_binprm(struct linux_binprm *bprm) if (bprm->file->f_op == NULL) return -EACCES; - bprm->e_uid = current_euid(); - bprm->e_gid = current_egid(); + /* clear any previous set[ug]id data from a previous binary */ + bprm->cred->euid = current_euid(); + bprm->cred->egid = current_egid(); - if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { + if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { /* Set-uid? */ if (mode & S_ISUID) { - current->personality &= ~PER_CLEAR_ON_SETID; - bprm->e_uid = inode->i_uid; + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->euid = inode->i_uid; } /* Set-gid? */ @@ -1063,50 +1104,23 @@ int prepare_binprm(struct linux_binprm *bprm) * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - current->personality &= ~PER_CLEAR_ON_SETID; - bprm->e_gid = inode->i_gid; + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->egid = inode->i_gid; } } /* fill in binprm security blob */ - retval = security_bprm_set(bprm); + retval = security_bprm_set_creds(bprm); if (retval) return retval; + bprm->cred_prepared = 1; - memset(bprm->buf,0,BINPRM_BUF_SIZE); - return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE); + memset(bprm->buf, 0, BINPRM_BUF_SIZE); + return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); } EXPORT_SYMBOL(prepare_binprm); -static int unsafe_exec(struct task_struct *p) -{ - int unsafe = tracehook_unsafe_exec(p); - - if (atomic_read(&p->fs->count) > 1 || - atomic_read(&p->files->count) > 1 || - atomic_read(&p->sighand->count) > 1) - unsafe |= LSM_UNSAFE_SHARE; - - return unsafe; -} - -void compute_creds(struct linux_binprm *bprm) -{ - int unsafe; - - if (bprm->e_uid != current_uid()) - current->pdeath_signal = 0; - exec_keys(current); - - task_lock(current); - unsafe = unsafe_exec(current); - security_bprm_apply_creds(bprm, unsafe); - task_unlock(current); - security_bprm_post_apply_creds(bprm); -} -EXPORT_SYMBOL(compute_creds); - /* * Arguments are '\0' separated strings found at the location bprm->p * points to; chop off the first by relocating brpm->p to right after @@ -1259,6 +1273,8 @@ EXPORT_SYMBOL(search_binary_handler); void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); + if (bprm->cred) + abort_creds(bprm->cred); kfree(bprm); } @@ -1284,10 +1300,20 @@ int do_execve(char * filename, if (!bprm) goto out_files; + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out_free; + + retval = -ENOMEM; + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; + check_unsafe_exec(bprm); + file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_kfree; + goto out_unlock; sched_exec(); @@ -1301,14 +1327,10 @@ int do_execve(char * filename, bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) - goto out_mm; + goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(bprm); - if (retval) goto out; retval = prepare_binprm(bprm); @@ -1330,21 +1352,18 @@ int do_execve(char * filename, current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(bprm); - acct_update_integrals(current); - free_bprm(bprm); - if (displaced) - put_files_struct(displaced); - return retval; - } + if (retval < 0) + goto out; -out: - if (bprm->security) - security_bprm_free(bprm); + /* execve succeeded */ + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); + if (displaced) + put_files_struct(displaced); + return retval; -out_mm: +out: if (bprm->mm) mmput (bprm->mm); @@ -1353,7 +1372,11 @@ out_file: allow_write_access(bprm->file); fput(bprm->file); } -out_kfree: + +out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +out_free: free_bprm(bprm); out_files: diff --git a/fs/internal.h b/fs/internal.h index 80aa9a023372..53af885f1732 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -10,6 +10,7 @@ */ struct super_block; +struct linux_binprm; /* * block_dev.c @@ -39,6 +40,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) */ extern void __init chrdev_init(void); +/* + * exec.c + */ +extern void check_unsafe_exec(struct linux_binprm *); + /* * namespace.c */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 0b2fcb698a63..e8ce2c4c7ac7 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -508,22 +508,6 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return 0; } -/* - * ieieeeeee, an audit function without a return code! - * - * This function might fail! I decided that it didn't matter. We are too late - * to fail the syscall and the information isn't REQUIRED for any purpose. It's - * just nice to have. We should be able to look at past audit logs to figure - * out this process's current cap set along with the fcaps from the PATH record - * and use that to come up with the final set. Yeah, its ugly, but all the info - * is still in the audit log. So I'm not going to bother mentioning we failed - * if we couldn't allocate memory. - * - * If someone changes their mind they could create the aux record earlier and - * then search here and use that earlier allocation. But I don't wanna. - * - * -Eric - */ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7394b5b349ff..6cbfbe297180 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -35,16 +35,20 @@ struct linux_binprm{ struct mm_struct *mm; unsigned long p; /* current top of mem */ unsigned int sh_bang:1, - misc_bang:1; + misc_bang:1, + cred_prepared:1,/* true if creds already prepared (multiple + * preps happen for interpreters) */ + cap_effective:1;/* true if has elevated effective capabilities, + * false if not; except for init which inherits + * its parent's caps anyway */ #ifdef __alpha__ unsigned int taso:1; #endif unsigned int recursion_depth; struct file * file; - int e_uid, e_gid; - kernel_cap_t cap_post_exec_permitted; - bool cap_effective; - void *security; + struct cred *cred; /* new credentials */ + int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ + unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; char * filename; /* Name of binary as seen by procps */ char * interp; /* Name of the binary really executed. Most @@ -101,7 +105,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); -extern void compute_creds(struct linux_binprm *binprm); +extern void install_exec_creds(struct linux_binprm *bprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/include/linux/cred.h b/include/linux/cred.h index eaf6fa695a04..8edb4d1d5427 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,8 +84,6 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; - -extern void release_tgcred(struct cred *cred); #endif /* @@ -144,6 +142,7 @@ struct cred { extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); extern struct cred *prepare_creds(void); +extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); diff --git a/include/linux/key.h b/include/linux/key.h index 69ecf0934b02..21d32a142c00 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,7 +278,6 @@ extern ctl_table key_sysctls[]; * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); -extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); @@ -294,7 +293,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) diff --git a/include/linux/security.h b/include/linux/security.h index 68be11251447..56a0eed65673 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -57,8 +57,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_set_creds(struct linux_binprm *bprm); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -110,7 +109,7 @@ extern unsigned long mmap_min_addr; struct sched_param; struct request_sock; -/* bprm_apply_creds unsafe reasons */ +/* bprm->unsafe reasons */ #define LSM_UNSAFE_SHARE 1 #define LSM_UNSAFE_PTRACE 2 #define LSM_UNSAFE_PTRACE_CAP 4 @@ -154,36 +153,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * * Security hooks for program execution operations. * - * @bprm_alloc_security: - * Allocate and attach a security structure to the @bprm->security field. - * The security field is initialized to NULL when the bprm structure is - * allocated. - * @bprm contains the linux_binprm structure to be modified. - * Return 0 if operation was successful. - * @bprm_free_security: - * @bprm contains the linux_binprm structure to be modified. - * Deallocate and clear the @bprm->security field. - * @bprm_apply_creds: - * Compute and set the security attributes of a process being transformed - * by an execve operation based on the old attributes (current->security) - * and the information saved in @bprm->security by the set_security hook. - * Since this function may return an error, in which case the process will - * be killed. However, it can leave the security attributes of the - * process unchanged if an access failure occurs at this point. - * bprm_apply_creds is called under task_lock. @unsafe indicates various - * reasons why it may be unsafe to change security state. - * @bprm contains the linux_binprm structure. - * @bprm_post_apply_creds: - * Runs after bprm_apply_creds with the task_lock dropped, so that - * functions which cannot be called safely under the task_lock can - * be used. This hook is a good place to perform state changes on - * the process such as closing open file descriptors to which access - * is no longer granted if the attributes were changed. - * Note that a security module might need to save state between - * bprm_apply_creds and bprm_post_apply_creds to store the decision - * on whether the process may proceed. - * @bprm contains the linux_binprm structure. - * @bprm_set_security: + * @bprm_set_creds: * Save security information in the bprm->security field, typically based * on information about the bprm->file, for later use by the apply_creds * hook. This hook may also optionally check permissions (e.g. for @@ -196,15 +166,30 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in - * the preceding set_security call. The primary difference from - * set_security is that the argv list and envp list are reliably - * available in @bprm. This hook may be called multiple times - * during a single execve; and in each pass set_security is called - * first. + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). * @bprm_secureexec: * Return a boolean value (0 or 1) indicating whether a "secure exec" * is required. The flag is passed in the auxiliary table @@ -1301,13 +1286,11 @@ struct security_operations { int (*settime) (struct timespec *ts, struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); - int (*bprm_alloc_security) (struct linux_binprm *bprm); - void (*bprm_free_security) (struct linux_binprm *bprm); - int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); - void (*bprm_post_apply_creds) (struct linux_binprm *bprm); - int (*bprm_set_security) (struct linux_binprm *bprm); + int (*bprm_set_creds) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); int (*bprm_secureexec) (struct linux_binprm *bprm); + void (*bprm_committing_creds) (struct linux_binprm *bprm); + void (*bprm_committed_creds) (struct linux_binprm *bprm); int (*sb_alloc_security) (struct super_block *sb); void (*sb_free_security) (struct super_block *sb); @@ -1569,12 +1552,10 @@ int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); -int security_bprm_alloc(struct linux_binprm *bprm); -void security_bprm_free(struct linux_binprm *bprm); -int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); -void security_bprm_post_apply_creds(struct linux_binprm *bprm); -int security_bprm_set(struct linux_binprm *bprm); +int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); +void security_bprm_committing_creds(struct linux_binprm *bprm); +void security_bprm_committed_creds(struct linux_binprm *bprm); int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); @@ -1812,32 +1793,22 @@ static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) return cap_vm_enough_memory(mm, pages); } -static inline int security_bprm_alloc(struct linux_binprm *bprm) -{ - return 0; -} - -static inline void security_bprm_free(struct linux_binprm *bprm) -{ } - -static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_set_creds(struct linux_binprm *bprm) { - return cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_set_creds(bprm); } -static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) +static inline int security_bprm_check(struct linux_binprm *bprm) { - return; + return 0; } -static inline int security_bprm_set(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(struct linux_binprm *bprm) { - return cap_bprm_set_security(bprm); } -static inline int security_bprm_check(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(struct linux_binprm *bprm) { - return 0; } static inline int security_bprm_secureexec(struct linux_binprm *bprm) diff --git a/kernel/cred.c b/kernel/cred.c index cb6b5eda978d..e6fcdd67b2ec 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -68,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu) /* * Release a set of thread group credentials. */ -void release_tgcred(struct cred *cred) +static void release_tgcred(struct cred *cred) { #ifdef CONFIG_KEYS struct thread_group_cred *tgcred = cred->tgcred; @@ -163,6 +163,50 @@ error: } EXPORT_SYMBOL(prepare_creds); +/* + * Prepare credentials for current to perform an execve() + * - The caller must hold current->cred_exec_mutex + */ +struct cred *prepare_exec_creds(void) +{ + struct thread_group_cred *tgcred = NULL; + struct cred *new; + +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) + return NULL; +#endif + + new = prepare_creds(); + if (!new) { + kfree(tgcred); + return new; + } + +#ifdef CONFIG_KEYS + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); + new->thread_keyring = NULL; + + /* create a new per-thread-group creds for all this set of threads to + * share */ + memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); + + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + + /* inherit the session keyring; new process keyring */ + key_get(tgcred->session_keyring); + tgcred->process_keyring = NULL; + + release_tgcred(new); + new->tgcred = tgcred; +#endif + + return new; +} + /* * prepare new credentials for the usermode helper dispatcher */ diff --git a/security/capability.c b/security/capability.c index efeb6d9e0e6a..185804f99ad1 100644 --- a/security/capability.c +++ b/security/capability.c @@ -32,24 +32,19 @@ static int cap_quota_on(struct dentry *dentry) return 0; } -static int cap_bprm_alloc_security(struct linux_binprm *bprm) +static int cap_bprm_check_security (struct linux_binprm *bprm) { return 0; } -static void cap_bprm_free_security(struct linux_binprm *bprm) +static void cap_bprm_committing_creds(struct linux_binprm *bprm) { } -static void cap_bprm_post_apply_creds(struct linux_binprm *bprm) +static void cap_bprm_committed_creds(struct linux_binprm *bprm) { } -static int cap_bprm_check_security(struct linux_binprm *bprm) -{ - return 0; -} - static int cap_sb_alloc_security(struct super_block *sb) { return 0; @@ -827,11 +822,9 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, syslog); set_to_cap_if_null(ops, settime); set_to_cap_if_null(ops, vm_enough_memory); - set_to_cap_if_null(ops, bprm_alloc_security); - set_to_cap_if_null(ops, bprm_free_security); - set_to_cap_if_null(ops, bprm_apply_creds); - set_to_cap_if_null(ops, bprm_post_apply_creds); - set_to_cap_if_null(ops, bprm_set_security); + set_to_cap_if_null(ops, bprm_set_creds); + set_to_cap_if_null(ops, bprm_committing_creds); + set_to_cap_if_null(ops, bprm_committed_creds); set_to_cap_if_null(ops, bprm_check_security); set_to_cap_if_null(ops, bprm_secureexec); set_to_cap_if_null(ops, sb_alloc_security); diff --git a/security/commoncap.c b/security/commoncap.c index b5419273f92d..51dfa11e8e56 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -167,7 +167,7 @@ int cap_capset(struct cred *new, static inline void bprm_clear_caps(struct linux_binprm *bprm) { - cap_clear(bprm->cap_post_exec_permitted); + cap_clear(bprm->cred->cap_permitted); bprm->cap_effective = false; } @@ -198,15 +198,15 @@ int cap_inode_killpriv(struct dentry *dentry) } static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, - struct linux_binprm *bprm) + struct linux_binprm *bprm, + bool *effective) { + struct cred *new = bprm->cred; unsigned i; int ret = 0; if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) - bprm->cap_effective = true; - else - bprm->cap_effective = false; + *effective = true; CAP_FOR_EACH_U32(i) { __u32 permitted = caps->permitted.cap[i]; @@ -215,16 +215,13 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, /* * pP' = (X & fP) | (pI & fI) */ - bprm->cap_post_exec_permitted.cap[i] = - (current->cred->cap_bset.cap[i] & permitted) | - (current->cred->cap_inheritable.cap[i] & inheritable); + new->cap_permitted.cap[i] = + (new->cap_bset.cap[i] & permitted) | + (new->cap_inheritable.cap[i] & inheritable); - if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { - /* - * insufficient to execute correctly - */ + if (permitted & ~new->cap_permitted.cap[i]) + /* insufficient to execute correctly */ ret = -EPERM; - } } /* @@ -232,7 +229,7 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * do not have enough capabilities, we return an error if they are * missing some "forced" (aka file-permitted) capabilities. */ - return bprm->cap_effective ? ret : 0; + return *effective ? ret : 0; } int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) @@ -250,10 +247,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); - if (size == -ENODATA || size == -EOPNOTSUPP) { + if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; - } if (size < 0) return size; @@ -262,7 +258,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); - switch ((magic_etc & VFS_CAP_REVISION_MASK)) { + switch (magic_etc & VFS_CAP_REVISION_MASK) { case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; @@ -283,11 +279,12 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); } + return 0; } /* Locate any VFS capabilities: */ -static int get_file_caps(struct linux_binprm *bprm) +static int get_file_caps(struct linux_binprm *bprm, bool *effective) { struct dentry *dentry; int rc = 0; @@ -313,7 +310,10 @@ static int get_file_caps(struct linux_binprm *bprm) goto out; } - rc = bprm_caps_from_vfs_caps(&vcaps, bprm); + rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective); + if (rc == -EINVAL) + printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", + __func__, rc, bprm->filename); out: dput(dentry); @@ -334,18 +334,27 @@ int cap_inode_killpriv(struct dentry *dentry) return 0; } -static inline int get_file_caps(struct linux_binprm *bprm) +static inline int get_file_caps(struct linux_binprm *bprm, bool *effective) { bprm_clear_caps(bprm); return 0; } #endif -int cap_bprm_set_security (struct linux_binprm *bprm) +/* + * set up the new credentials for an exec'd task + */ +int cap_bprm_set_creds(struct linux_binprm *bprm) { + const struct cred *old = current_cred(); + struct cred *new = bprm->cred; + bool effective; int ret; - ret = get_file_caps(bprm); + effective = false; + ret = get_file_caps(bprm, &effective); + if (ret < 0) + return ret; if (!issecure(SECURE_NOROOT)) { /* @@ -353,63 +362,47 @@ int cap_bprm_set_security (struct linux_binprm *bprm) * executables under compatibility mode, we override the * capability sets for the file. * - * If only the real uid is 0, we do not set the effective - * bit. + * If only the real uid is 0, we do not set the effective bit. */ - if (bprm->e_uid == 0 || current_uid() == 0) { + if (new->euid == 0 || new->uid == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ - bprm->cap_post_exec_permitted = cap_combine( - current->cred->cap_bset, - current->cred->cap_inheritable); - bprm->cap_effective = (bprm->e_uid == 0); - ret = 0; + new->cap_permitted = cap_combine(old->cap_bset, + old->cap_inheritable); } + if (new->euid == 0) + effective = true; } - return ret; -} - -int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) -{ - const struct cred *old = current_cred(); - struct cred *new; - - new = prepare_creds(); - if (!new) - return -ENOMEM; - - if (bprm->e_uid != old->uid || bprm->e_gid != old->gid || - !cap_issubset(bprm->cap_post_exec_permitted, - old->cap_permitted)) { - set_dumpable(current->mm, suid_dumpable); - current->pdeath_signal = 0; - - if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { - if (!capable(CAP_SETUID)) { - bprm->e_uid = old->uid; - bprm->e_gid = old->gid; - } - if (cap_limit_ptraced_target()) { - bprm->cap_post_exec_permitted = cap_intersect( - bprm->cap_post_exec_permitted, - new->cap_permitted); - } + /* Don't let someone trace a set[ug]id/setpcap binary with the revised + * credentials unless they have the appropriate permit + */ + if ((new->euid != old->uid || + new->egid != old->gid || + !cap_issubset(new->cap_permitted, old->cap_permitted)) && + bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) { + /* downgrade; they get no more than they had, and maybe less */ + if (!capable(CAP_SETUID)) { + new->euid = new->uid; + new->egid = new->gid; } + if (cap_limit_ptraced_target()) + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); } - new->suid = new->euid = new->fsuid = bprm->e_uid; - new->sgid = new->egid = new->fsgid = bprm->e_gid; + new->suid = new->fsuid = new->euid; + new->sgid = new->fsgid = new->egid; - /* For init, we want to retain the capabilities set - * in the init_task struct. Thus we skip the usual - * capability rules */ + /* For init, we want to retain the capabilities set in the initial + * task. Thus we skip the usual capability rules + */ if (!is_global_init(current)) { - new->cap_permitted = bprm->cap_post_exec_permitted; - if (bprm->cap_effective) - new->cap_effective = bprm->cap_post_exec_permitted; + if (effective) + new->cap_effective = new->cap_permitted; else cap_clear(new->cap_effective); } + bprm->cap_effective = effective; /* * Audit candidate if current->cap_effective is set @@ -425,23 +418,31 @@ int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) */ if (!cap_isclear(new->cap_effective)) { if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || - bprm->e_uid != 0 || new->uid != 0 || - issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, new, old); + new->euid != 0 || new->uid != 0 || + issecure(SECURE_NOROOT)) { + ret = audit_log_bprm_fcaps(bprm, new, old); + if (ret < 0) + return ret; + } } new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); - return commit_creds(new); + return 0; } -int cap_bprm_secureexec (struct linux_binprm *bprm) +/* + * determine whether a secure execution is required + * - the creds have been committed at this point, and are no longer available + * through bprm + */ +int cap_bprm_secureexec(struct linux_binprm *bprm) { const struct cred *cred = current_cred(); if (cred->uid != 0) { if (bprm->cap_effective) return 1; - if (!cap_isclear(bprm->cap_post_exec_permitted)) + if (!cap_isclear(cred->cap_permitted)) return 1; } @@ -477,7 +478,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) } /* moved from kernel/sys.c. */ -/* +/* * cap_emulate_setxuid() fixes the effective / permitted capabilities of * a process after a call to setuid, setreuid, or setresuid. * @@ -491,10 +492,10 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective * capabilities are set to the permitted capabilities. * - * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should + * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should * never happen. * - * -astor + * -astor * * cevans - New behaviour, Oct '99 * A process may, via prctl(), elect to keep its capabilities when it @@ -751,4 +752,3 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) cap_sys_admin = 1; return __vm_enough_memory(mm, pages, cap_sys_admin); } - diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index df329f684a65..2f5d89e92b85 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -274,48 +274,6 @@ static int install_session_keyring(struct key *keyring) return commit_creds(new); } -/*****************************************************************************/ -/* - * deal with execve() - */ -int exec_keys(struct task_struct *tsk) -{ - struct thread_group_cred *tgcred = NULL; - struct cred *new; - -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return -ENOMEM; -#endif - - new = prepare_creds(); - if (new < 0) - return -ENOMEM; - - /* newly exec'd tasks don't get a thread keyring */ - key_put(new->thread_keyring); - new->thread_keyring = NULL; - - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; - - commit_creds(new); - return 0; - -} /* end exec_keys() */ - /*****************************************************************************/ /* * the filesystem user ID changed diff --git a/security/root_plug.c b/security/root_plug.c index c3f68b5b372d..40fb4f15e27b 100644 --- a/security/root_plug.c +++ b/security/root_plug.c @@ -55,9 +55,9 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm) struct usb_device *dev; root_dbg("file %s, e_uid = %d, e_gid = %d\n", - bprm->filename, bprm->e_uid, bprm->e_gid); + bprm->filename, bprm->cred->euid, bprm->cred->egid); - if (bprm->e_gid == 0) { + if (bprm->cred->egid == 0) { dev = usb_find_device(vendor_id, product_id); if (!dev) { root_dbg("e_gid = 0, and device not found, " @@ -75,15 +75,12 @@ static struct security_operations rootplug_security_ops = { .ptrace_may_access = cap_ptrace_may_access, .ptrace_traceme = cap_ptrace_traceme, .capget = cap_capget, - .capset_check = cap_capset_check, - .capset_set = cap_capset_set, + .capset = cap_capset, .capable = cap_capable, - .bprm_apply_creds = cap_bprm_apply_creds, - .bprm_set_security = cap_bprm_set_security, + .bprm_set_creds = cap_bprm_set_creds, - .task_post_setuid = cap_task_post_setuid, - .task_reparent_to_init = cap_task_reparent_to_init, + .task_fix_setuid = cap_task_fix_setuid, .task_prctl = cap_task_prctl, .bprm_check_security = rootplug_bprm_check_security, diff --git a/security/security.c b/security/security.c index a55d739c6864..dc5babb2d6d8 100644 --- a/security/security.c +++ b/security/security.c @@ -213,34 +213,24 @@ int security_vm_enough_memory_kern(long pages) return security_ops->vm_enough_memory(current->mm, pages); } -int security_bprm_alloc(struct linux_binprm *bprm) +int security_bprm_set_creds(struct linux_binprm *bprm) { - return security_ops->bprm_alloc_security(bprm); + return security_ops->bprm_set_creds(bprm); } -void security_bprm_free(struct linux_binprm *bprm) -{ - security_ops->bprm_free_security(bprm); -} - -int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) -{ - return security_ops->bprm_apply_creds(bprm, unsafe); -} - -void security_bprm_post_apply_creds(struct linux_binprm *bprm) +int security_bprm_check(struct linux_binprm *bprm) { - security_ops->bprm_post_apply_creds(bprm); + return security_ops->bprm_check_security(bprm); } -int security_bprm_set(struct linux_binprm *bprm) +void security_bprm_committing_creds(struct linux_binprm *bprm) { - return security_ops->bprm_set_security(bprm); + return security_ops->bprm_committing_creds(bprm); } -int security_bprm_check(struct linux_binprm *bprm) +void security_bprm_committed_creds(struct linux_binprm *bprm) { - return security_ops->bprm_check_security(bprm); + return security_ops->bprm_committed_creds(bprm); } int security_bprm_secureexec(struct linux_binprm *bprm) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c71bba78872f..21a592184633 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2029,59 +2029,45 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) /* binprm security operations */ -static int selinux_bprm_alloc_security(struct linux_binprm *bprm) +static int selinux_bprm_set_creds(struct linux_binprm *bprm) { - struct bprm_security_struct *bsec; - - bsec = kzalloc(sizeof(struct bprm_security_struct), GFP_KERNEL); - if (!bsec) - return -ENOMEM; - - bsec->sid = SECINITSID_UNLABELED; - bsec->set = 0; - - bprm->security = bsec; - return 0; -} - -static int selinux_bprm_set_security(struct linux_binprm *bprm) -{ - struct task_security_struct *tsec; - struct inode *inode = bprm->file->f_path.dentry->d_inode; + const struct task_security_struct *old_tsec; + struct task_security_struct *new_tsec; struct inode_security_struct *isec; - struct bprm_security_struct *bsec; - u32 newsid; struct avc_audit_data ad; + struct inode *inode = bprm->file->f_path.dentry->d_inode; int rc; - rc = secondary_ops->bprm_set_security(bprm); + rc = secondary_ops->bprm_set_creds(bprm); if (rc) return rc; - bsec = bprm->security; - - if (bsec->set) + /* SELinux context only depends on initial program or script and not + * the script interpreter */ + if (bprm->cred_prepared) return 0; - tsec = current_security(); + old_tsec = current_security(); + new_tsec = bprm->cred->security; isec = inode->i_security; /* Default to the current task SID. */ - bsec->sid = tsec->sid; + new_tsec->sid = old_tsec->sid; + new_tsec->osid = old_tsec->sid; /* Reset fs, key, and sock SIDs on execve. */ - tsec->create_sid = 0; - tsec->keycreate_sid = 0; - tsec->sockcreate_sid = 0; + new_tsec->create_sid = 0; + new_tsec->keycreate_sid = 0; + new_tsec->sockcreate_sid = 0; - if (tsec->exec_sid) { - newsid = tsec->exec_sid; + if (old_tsec->exec_sid) { + new_tsec->sid = old_tsec->exec_sid; /* Reset exec SID on execve. */ - tsec->exec_sid = 0; + new_tsec->exec_sid = 0; } else { /* Check for a default transition on this program. */ - rc = security_transition_sid(tsec->sid, isec->sid, - SECCLASS_PROCESS, &newsid); + rc = security_transition_sid(old_tsec->sid, isec->sid, + SECCLASS_PROCESS, &new_tsec->sid); if (rc) return rc; } @@ -2090,33 +2076,63 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) ad.u.fs.path = bprm->file->f_path; if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) - newsid = tsec->sid; + new_tsec->sid = old_tsec->sid; - if (tsec->sid == newsid) { - rc = avc_has_perm(tsec->sid, isec->sid, + if (new_tsec->sid == old_tsec->sid) { + rc = avc_has_perm(old_tsec->sid, isec->sid, SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); if (rc) return rc; } else { /* Check permissions for the transition. */ - rc = avc_has_perm(tsec->sid, newsid, + rc = avc_has_perm(old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); if (rc) return rc; - rc = avc_has_perm(newsid, isec->sid, + rc = avc_has_perm(new_tsec->sid, isec->sid, SECCLASS_FILE, FILE__ENTRYPOINT, &ad); if (rc) return rc; - /* Clear any possibly unsafe personality bits on exec: */ - current->personality &= ~PER_CLEAR_ON_SETID; + /* Check for shared state */ + if (bprm->unsafe & LSM_UNSAFE_SHARE) { + rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + SECCLASS_PROCESS, PROCESS__SHARE, + NULL); + if (rc) + return -EPERM; + } + + /* Make sure that anyone attempting to ptrace over a task that + * changes its SID has the appropriate permit */ + if (bprm->unsafe & + (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { + struct task_struct *tracer; + struct task_security_struct *sec; + u32 ptsid = 0; + + rcu_read_lock(); + tracer = tracehook_tracer_task(current); + if (likely(tracer != NULL)) { + sec = __task_cred(tracer)->security; + ptsid = sec->sid; + } + rcu_read_unlock(); + + if (ptsid != 0) { + rc = avc_has_perm(ptsid, new_tsec->sid, + SECCLASS_PROCESS, + PROCESS__PTRACE, NULL); + if (rc) + return -EPERM; + } + } - /* Set the security field to the new SID. */ - bsec->sid = newsid; + /* Clear any possibly unsafe personality bits on exec: */ + bprm->per_clear |= PER_CLEAR_ON_SETID; } - bsec->set = 1; return 0; } @@ -2125,7 +2141,6 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) return secondary_ops->bprm_check_security(bprm); } - static int selinux_bprm_secureexec(struct linux_binprm *bprm) { const struct cred *cred = current_cred(); @@ -2141,19 +2156,13 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) the noatsecure permission is granted between the two SIDs, i.e. ahp returns 0. */ atsecure = avc_has_perm(osid, sid, - SECCLASS_PROCESS, - PROCESS__NOATSECURE, NULL); + SECCLASS_PROCESS, + PROCESS__NOATSECURE, NULL); } return (atsecure || secondary_ops->bprm_secureexec(bprm)); } -static void selinux_bprm_free_security(struct linux_binprm *bprm) -{ - kfree(bprm->security); - bprm->security = NULL; -} - extern struct vfsmount *selinuxfs_mount; extern struct dentry *selinux_null; @@ -2252,108 +2261,78 @@ static inline void flush_unauthorized_files(const struct cred *cred, spin_unlock(&files->file_lock); } -static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +/* + * Prepare a process for imminent new credential changes due to exec + */ +static void selinux_bprm_committing_creds(struct linux_binprm *bprm) { - struct task_security_struct *tsec; - struct bprm_security_struct *bsec; - struct cred *new; - u32 sid; - int rc; - - rc = secondary_ops->bprm_apply_creds(bprm, unsafe); - if (rc < 0) - return rc; - - new = prepare_creds(); - if (!new) - return -ENOMEM; + struct task_security_struct *new_tsec; + struct rlimit *rlim, *initrlim; + int rc, i; - tsec = new->security; + secondary_ops->bprm_committing_creds(bprm); - bsec = bprm->security; - sid = bsec->sid; - - tsec->osid = tsec->sid; - bsec->unsafe = 0; - if (tsec->sid != sid) { - /* Check for shared state. If not ok, leave SID - unchanged and kill. */ - if (unsafe & LSM_UNSAFE_SHARE) { - rc = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, - PROCESS__SHARE, NULL); - if (rc) { - bsec->unsafe = 1; - goto out; - } - } + new_tsec = bprm->cred->security; + if (new_tsec->sid == new_tsec->osid) + return; - /* Check for ptracing, and update the task SID if ok. - Otherwise, leave SID unchanged and kill. */ - if (unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { - struct task_struct *tracer; - struct task_security_struct *sec; - u32 ptsid = 0; + /* Close files for which the new task SID is not authorized. */ + flush_unauthorized_files(bprm->cred, current->files); - rcu_read_lock(); - tracer = tracehook_tracer_task(current); - if (likely(tracer != NULL)) { - sec = __task_cred(tracer)->security; - ptsid = sec->sid; - } - rcu_read_unlock(); + /* Always clear parent death signal on SID transitions. */ + current->pdeath_signal = 0; - if (ptsid != 0) { - rc = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, NULL); - if (rc) { - bsec->unsafe = 1; - goto out; - } - } + /* Check whether the new SID can inherit resource limits from the old + * SID. If not, reset all soft limits to the lower of the current + * task's hard limit and the init task's soft limit. + * + * Note that the setting of hard limits (even to lower them) can be + * controlled by the setrlimit check. The inclusion of the init task's + * soft limit into the computation is to avoid resetting soft limits + * higher than the default soft limit for cases where the default is + * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. + */ + rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, + PROCESS__RLIMITINH, NULL); + if (rc) { + for (i = 0; i < RLIM_NLIMITS; i++) { + rlim = current->signal->rlim + i; + initrlim = init_task.signal->rlim + i; + rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - tsec->sid = sid; + update_rlimit_cpu(rlim->rlim_cur); } - -out: - commit_creds(new); - return 0; } /* - * called after apply_creds without the task lock held + * Clean up the process immediately after the installation of new credentials + * due to exec */ -static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) +static void selinux_bprm_committed_creds(struct linux_binprm *bprm) { - const struct cred *cred = current_cred(); - struct task_security_struct *tsec; - struct rlimit *rlim, *initrlim; + const struct task_security_struct *tsec = current_security(); struct itimerval itimer; - struct bprm_security_struct *bsec; struct sighand_struct *psig; + u32 osid, sid; int rc, i; unsigned long flags; - tsec = current_security(); - bsec = bprm->security; + secondary_ops->bprm_committed_creds(bprm); - if (bsec->unsafe) { - force_sig_specific(SIGKILL, current); - return; - } - if (tsec->osid == tsec->sid) + osid = tsec->osid; + sid = tsec->sid; + + if (sid == osid) return; - /* Close files for which the new task SID is not authorized. */ - flush_unauthorized_files(cred, current->files); - - /* Check whether the new SID can inherit signal state - from the old SID. If not, clear itimers to avoid - subsequent signal generation and flush and unblock - signals. This must occur _after_ the task SID has - been updated so that any kill done after the flush - will be checked against the new SID. */ - rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS, - PROCESS__SIGINH, NULL); + /* Check whether the new SID can inherit signal state from the old SID. + * If not, clear itimers to avoid subsequent signal generation and + * flush and unblock signals. + * + * This must occur _after_ the task SID has been updated so that any + * kill done after the flush will be checked against the new SID. + */ + rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); if (rc) { memset(&itimer, 0, sizeof itimer); for (i = 0; i < 3; i++) @@ -2366,32 +2345,8 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) spin_unlock_irq(¤t->sighand->siglock); } - /* Always clear parent death signal on SID transitions. */ - current->pdeath_signal = 0; - - /* Check whether the new SID can inherit resource limits - from the old SID. If not, reset all soft limits to - the lower of the current task's hard limit and the init - task's soft limit. Note that the setting of hard limits - (even to lower them) can be controlled by the setrlimit - check. The inclusion of the init task's soft limit into - the computation is to avoid resetting soft limits higher - than the default soft limit for cases where the default - is lower than the hard limit, e.g. RLIMIT_CORE or - RLIMIT_STACK.*/ - rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS, - PROCESS__RLIMITINH, NULL); - if (rc) { - for (i = 0; i < RLIM_NLIMITS; i++) { - rlim = current->signal->rlim + i; - initrlim = init_task.signal->rlim+i; - rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); - } - update_rlimit_cpu(rlim->rlim_cur); - } - - /* Wake up the parent if it is waiting so that it can - recheck wait permission to the new task SID. */ + /* Wake up the parent if it is waiting so that it can recheck + * wait permission to the new task SID. */ read_lock_irq(&tasklist_lock); psig = current->parent->sighand; spin_lock_irqsave(&psig->siglock, flags); @@ -5556,12 +5511,10 @@ static struct security_operations selinux_ops = { .netlink_send = selinux_netlink_send, .netlink_recv = selinux_netlink_recv, - .bprm_alloc_security = selinux_bprm_alloc_security, - .bprm_free_security = selinux_bprm_free_security, - .bprm_apply_creds = selinux_bprm_apply_creds, - .bprm_post_apply_creds = selinux_bprm_post_apply_creds, - .bprm_set_security = selinux_bprm_set_security, + .bprm_set_creds = selinux_bprm_set_creds, .bprm_check_security = selinux_bprm_check_security, + .bprm_committing_creds = selinux_bprm_committing_creds, + .bprm_committed_creds = selinux_bprm_committed_creds, .bprm_secureexec = selinux_bprm_secureexec, .sb_alloc_security = selinux_sb_alloc_security, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index f8be8d7fa26d..3cc45168f674 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -77,17 +77,6 @@ struct ipc_security_struct { u32 sid; /* SID of IPC resource */ }; -struct bprm_security_struct { - u32 sid; /* SID for transformed process */ - unsigned char set; - - /* - * unsafe is used to share failure information from bprm_apply_creds() - * to bprm_post_apply_creds(). - */ - char unsafe; -}; - struct netif_security_struct { int ifindex; /* device index */ u32 sid; /* SID for this interface */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e952b397153d..de396742abf4 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2596,8 +2596,7 @@ struct security_operations smack_ops = { .settime = cap_settime, .vm_enough_memory = cap_vm_enough_memory, - .bprm_apply_creds = cap_bprm_apply_creds, - .bprm_set_security = cap_bprm_set_security, + .bprm_set_creds = cap_bprm_set_creds, .bprm_secureexec = cap_bprm_secureexec, .sb_alloc_security = smack_sb_alloc_security, -- cgit v1.2.3 From 98870ab0a5a3f1822aee681d2997017e1c87d026 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Documentation Document credentials and the new credentials API. Signed-off-by: David Howells Signed-off-by: James Morris --- Documentation/credentials.txt | 582 ++++++++++++++++++++++++++++++++++++++++++ include/linux/cred.h | 12 +- kernel/cred.c | 2 +- 3 files changed, 594 insertions(+), 2 deletions(-) create mode 100644 Documentation/credentials.txt (limited to 'kernel') diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt new file mode 100644 index 000000000000..df03169782ea --- /dev/null +++ b/Documentation/credentials.txt @@ -0,0 +1,582 @@ + ==================== + CREDENTIALS IN LINUX + ==================== + +By: David Howells + +Contents: + + (*) Overview. + + (*) Types of credentials. + + (*) File markings. + + (*) Task credentials. + + - Immutable credentials. + - Accessing task credentials. + - Accessing another task's credentials. + - Altering credentials. + - Managing credentials. + + (*) Open file credentials. + + (*) Overriding the VFS's use of credentials. + + +======== +OVERVIEW +======== + +There are several parts to the security check performed by Linux when one +object acts upon another: + + (1) Objects. + + Objects are things in the system that may be acted upon directly by + userspace programs. Linux has a variety of actionable objects, including: + + - Tasks + - Files/inodes + - Sockets + - Message queues + - Shared memory segments + - Semaphores + - Keys + + As a part of the description of all these objects there is a set of + credentials. What's in the set depends on the type of object. + + (2) Object ownership. + + Amongst the credentials of most objects, there will be a subset that + indicates the ownership of that object. This is used for resource + accounting and limitation (disk quotas and task rlimits for example). + + In a standard UNIX filesystem, for instance, this will be defined by the + UID marked on the inode. + + (3) The objective context. + + Also amongst the credentials of those objects, there will be a subset that + indicates the 'objective context' of that object. This may or may not be + the same set as in (2) - in standard UNIX files, for instance, this is the + defined by the UID and the GID marked on the inode. + + The objective context is used as part of the security calculation that is + carried out when an object is acted upon. + + (4) Subjects. + + A subject is an object that is acting upon another object. + + Most of the objects in the system are inactive: they don't act on other + objects within the system. Processes/tasks are the obvious exception: + they do stuff; they access and manipulate things. + + Objects other than tasks may under some circumstances also be subjects. + For instance an open file may send SIGIO to a task using the UID and EUID + given to it by a task that called fcntl(F_SETOWN) upon it. In this case, + the file struct will have a subjective context too. + + (5) The subjective context. + + A subject has an additional interpretation of its credentials. A subset + of its credentials forms the 'subjective context'. The subjective context + is used as part of the security calculation that is carried out when a + subject acts. + + A Linux task, for example, has the FSUID, FSGID and the supplementary + group list for when it is acting upon a file - which are quite separate + from the real UID and GID that normally form the objective context of the + task. + + (6) Actions. + + Linux has a number of actions available that a subject may perform upon an + object. The set of actions available depends on the nature of the subject + and the object. + + Actions include reading, writing, creating and deleting files; forking or + signalling and tracing tasks. + + (7) Rules, access control lists and security calculations. + + When a subject acts upon an object, a security calculation is made. This + involves taking the subjective context, the objective context and the + action, and searching one or more sets of rules to see whether the subject + is granted or denied permission to act in the desired manner on the + object, given those contexts. + + There are two main sources of rules: + + (a) Discretionary access control (DAC): + + Sometimes the object will include sets of rules as part of its + description. This is an 'Access Control List' or 'ACL'. A Linux + file may supply more than one ACL. + + A traditional UNIX file, for example, includes a permissions mask that + is an abbreviated ACL with three fixed classes of subject ('user', + 'group' and 'other'), each of which may be granted certain privileges + ('read', 'write' and 'execute' - whatever those map to for the object + in question). UNIX file permissions do not allow the arbitrary + specification of subjects, however, and so are of limited use. + + A Linux file might also sport a POSIX ACL. This is a list of rules + that grants various permissions to arbitrary subjects. + + (b) Mandatory access control (MAC): + + The system as a whole may have one or more sets of rules that get + applied to all subjects and objects, regardless of their source. + SELinux and Smack are examples of this. + + In the case of SELinux and Smack, each object is given a label as part + of its credentials. When an action is requested, they take the + subject label, the object label and the action and look for a rule + that says that this action is either granted or denied. + + +==================== +TYPES OF CREDENTIALS +==================== + +The Linux kernel supports the following types of credentials: + + (1) Traditional UNIX credentials. + + Real User ID + Real Group ID + + The UID and GID are carried by most, if not all, Linux objects, even if in + some cases it has to be invented (FAT or CIFS files for example, which are + derived from Windows). These (mostly) define the objective context of + that object, with tasks being slightly different in some cases. + + Effective, Saved and FS User ID + Effective, Saved and FS Group ID + Supplementary groups + + These are additional credentials used by tasks only. Usually, an + EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID + will be used as the objective. For tasks, it should be noted that this is + not always true. + + (2) Capabilities. + + Set of permitted capabilities + Set of inheritable capabilities + Set of effective capabilities + Capability bounding set + + These are only carried by tasks. They indicate superior capabilities + granted piecemeal to a task that an ordinary task wouldn't otherwise have. + These are manipulated implicitly by changes to the traditional UNIX + credentials, but can also be manipulated directly by the capset() system + call. + + The permitted capabilities are those caps that the process might grant + itself to its effective or permitted sets through capset(). This + inheritable set might also be so constrained. + + The effective capabilities are the ones that a task is actually allowed to + make use of itself. + + The inheritable capabilities are the ones that may get passed across + execve(). + + The bounding set limits the capabilities that may be inherited across + execve(), especially when a binary is executed that will execute as UID 0. + + (3) Secure management flags (securebits). + + These are only carried by tasks. These govern the way the above + credentials are manipulated and inherited over certain operations such as + execve(). They aren't used directly as objective or subjective + credentials. + + (4) Keys and keyrings. + + These are only carried by tasks. They carry and cache security tokens + that don't fit into the other standard UNIX credentials. They are for + making such things as network filesystem keys available to the file + accesses performed by processes, without the necessity of ordinary + programs having to know about security details involved. + + Keyrings are a special type of key. They carry sets of other keys and can + be searched for the desired key. Each process may subscribe to a number + of keyrings: + + Per-thread keying + Per-process keyring + Per-session keyring + + When a process accesses a key, if not already present, it will normally be + cached on one of these keyrings for future accesses to find. + + For more information on using keys, see Documentation/keys.txt. + + (5) LSM + + The Linux Security Module allows extra controls to be placed over the + operations that a task may do. Currently Linux supports two main + alternate LSM options: SELinux and Smack. + + Both work by labelling the objects in a system and then applying sets of + rules (policies) that say what operations a task with one label may do to + an object with another label. + + (6) AF_KEY + + This is a socket-based approach to credential management for networking + stacks [RFC 2367]. It isn't discussed by this document as it doesn't + interact directly with task and file credentials; rather it keeps system + level credentials. + + +When a file is opened, part of the opening task's subjective context is +recorded in the file struct created. This allows operations using that file +struct to use those credentials instead of the subjective context of the task +that issued the operation. An example of this would be a file opened on a +network filesystem where the credentials of the opened file should be presented +to the server, regardless of who is actually doing a read or a write upon it. + + +============= +FILE MARKINGS +============= + +Files on disk or obtained over the network may have annotations that form the +objective security context of that file. Depending on the type of filesystem, +this may include one or more of the following: + + (*) UNIX UID, GID, mode; + + (*) Windows user ID; + + (*) Access control list; + + (*) LSM security label; + + (*) UNIX exec privilege escalation bits (SUID/SGID); + + (*) File capabilities exec privilege escalation bits. + +These are compared to the task's subjective security context, and certain +operations allowed or disallowed as a result. In the case of execve(), the +privilege escalation bits come into play, and may allow the resulting process +extra privileges, based on the annotations on the executable file. + + +================ +TASK CREDENTIALS +================ + +In Linux, all of a task's credentials are held in (uid, gid) or through +(groups, keys, LSM security) a refcounted structure of type 'struct cred'. +Each task points to its credentials by a pointer called 'cred' in its +task_struct. + +Once a set of credentials has been prepared and committed, it may not be +changed, barring the following exceptions: + + (1) its reference count may be changed; + + (2) the reference count on the group_info struct it points to may be changed; + + (3) the reference count on the security data it points to may be changed; + + (4) the reference count on any keyrings it points to may be changed; + + (5) any keyrings it points to may be revoked, expired or have their security + attributes changed; and + + (6) the contents of any keyrings to which it points may be changed (the whole + point of keyrings being a shared set of credentials, modifiable by anyone + with appropriate access). + +To alter anything in the cred struct, the copy-and-replace principle must be +adhered to. First take a copy, then alter the copy and then use RCU to change +the task pointer to make it point to the new copy. There are wrappers to aid +with this (see below). + +A task may only alter its _own_ credentials; it is no longer permitted for a +task to alter another's credentials. This means the capset() system call is no +longer permitted to take any PID other than the one of the current process. +Also keyctl_instantiate() and keyctl_negate() functions no longer permit +attachment to process-specific keyrings in the requesting process as the +instantiating process may need to create them. + + +IMMUTABLE CREDENTIALS +--------------------- + +Once a set of credentials has been made public (by calling commit_creds() for +example), it must be considered immutable, barring two exceptions: + + (1) The reference count may be altered. + + (2) Whilst the keyring subscriptions of a set of credentials may not be + changed, the keyrings subscribed to may have their contents altered. + +To catch accidental credential alteration at compile time, struct task_struct +has _const_ pointers to its credential sets, as does struct file. Furthermore, +certain functions such as get_cred() and put_cred() operate on const pointers, +thus rendering casts unnecessary, but require to temporarily ditch the const +qualification to be able to alter the reference count. + + +ACCESSING TASK CREDENTIALS +-------------------------- + +A task being able to alter only its own credentials permits the current process +to read or replace its own credentials without the need for any form of locking +- which simplifies things greatly. It can just call: + + const struct cred *current_cred() + +to get a pointer to its credentials structure, and it doesn't have to release +it afterwards. + +There are convenience wrappers for retrieving specific aspects of a task's +credentials (the value is simply returned in each case): + + uid_t current_uid(void) Current's real UID + gid_t current_gid(void) Current's real GID + uid_t current_euid(void) Current's effective UID + gid_t current_egid(void) Current's effective GID + uid_t current_fsuid(void) Current's file access UID + gid_t current_fsgid(void) Current's file access GID + kernel_cap_t current_cap(void) Current's effective capabilities + void *current_security(void) Current's LSM security pointer + struct user_struct *current_user(void) Current's user account + +There are also convenience wrappers for retrieving specific associated pairs of +a task's credentials: + + void current_uid_gid(uid_t *, gid_t *); + void current_euid_egid(uid_t *, gid_t *); + void current_fsuid_fsgid(uid_t *, gid_t *); + +which return these pairs of values through their arguments after retrieving +them from the current task's credentials. + + +In addition, there is a function for obtaining a reference on the current +process's current set of credentials: + + const struct cred *get_current_cred(void); + +and functions for getting references to one of the credentials that don't +actually live in struct cred: + + struct user_struct *get_current_user(void); + struct group_info *get_current_groups(void); + +which get references to the current process's user accounting structure and +supplementary groups list respectively. + +Once a reference has been obtained, it must be released with put_cred(), +free_uid() or put_group_info() as appropriate. + + +ACCESSING ANOTHER TASK'S CREDENTIALS +------------------------------------ + +Whilst a task may access its own credentials without the need for locking, the +same is not true of a task wanting to access another task's credentials. It +must use the RCU read lock and rcu_dereference(). + +The rcu_dereference() is wrapped by: + + const struct cred *__task_cred(struct task_struct *task); + +This should be used inside the RCU read lock, as in the following example: + + void foo(struct task_struct *t, struct foo_data *f) + { + const struct cred *tcred; + ... + rcu_read_lock(); + tcred = __task_cred(t); + f->uid = tcred->uid; + f->gid = tcred->gid; + f->groups = get_group_info(tcred->groups); + rcu_read_unlock(); + ... + } + +A function need not get RCU read lock to use __task_cred() if it is holding a +spinlock at the time as this implicitly holds the RCU read lock. + +Should it be necessary to hold another task's credentials for a long period of +time, and possibly to sleep whilst doing so, then the caller should get a +reference on them using: + + const struct cred *get_task_cred(struct task_struct *task); + +This does all the RCU magic inside of it. The caller must call put_cred() on +the credentials so obtained when they're finished with. + +There are a couple of convenience functions to access bits of another task's +credentials, hiding the RCU magic from the caller: + + uid_t task_uid(task) Task's real UID + uid_t task_euid(task) Task's effective UID + +If the caller is holding a spinlock or the RCU read lock at the time anyway, +then: + + __task_cred(task)->uid + __task_cred(task)->euid + +should be used instead. Similarly, if multiple aspects of a task's credentials +need to be accessed, RCU read lock or a spinlock should be used, __task_cred() +called, the result stored in a temporary pointer and then the credential +aspects called from that before dropping the lock. This prevents the +potentially expensive RCU magic from being invoked multiple times. + +Should some other single aspect of another task's credentials need to be +accessed, then this can be used: + + task_cred_xxx(task, member) + +where 'member' is a non-pointer member of the cred struct. For instance: + + uid_t task_cred_xxx(task, suid); + +will retrieve 'struct cred::suid' from the task, doing the appropriate RCU +magic. This may not be used for pointer members as what they point to may +disappear the moment the RCU read lock is dropped. + + +ALTERING CREDENTIALS +-------------------- + +As previously mentioned, a task may only alter its own credentials, and may not +alter those of another task. This means that it doesn't need to use any +locking to alter its own credentials. + +To alter the current process's credentials, a function should first prepare a +new set of credentials by calling: + + struct cred *prepare_creds(void); + +this locks current->cred_replace_mutex and then allocates and constructs a +duplicate of the current process's credentials, returning with the mutex still +held if successful. It returns NULL if not successful (out of memory). + +The mutex prevents ptrace() from altering the ptrace state of a process whilst +security checks on credentials construction and changing is taking place as +the ptrace state may alter the outcome, particularly in the case of execve(). + +The new credentials set should be altered appropriately, and any security +checks and hooks done. Both the current and the proposed sets of credentials +are available for this purpose as current_cred() will return the current set +still at this point. + + +When the credential set is ready, it should be committed to the current process +by calling: + + int commit_creds(struct cred *new); + +This will alter various aspects of the credentials and the process, giving the +LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually +commit the new credentials to current->cred, it will release +current->cred_replace_mutex to allow ptrace() to take place, and it will notify +the scheduler and others of the changes. + +This function is guaranteed to return 0, so that it can be tail-called at the +end of such functions as sys_setresuid(). + +Note that this function consumes the caller's reference to the new credentials. +The caller should _not_ call put_cred() on the new credentials afterwards. + +Furthermore, once this function has been called on a new set of credentials, +those credentials may _not_ be changed further. + + +Should the security checks fail or some other error occur after prepare_creds() +has been called, then the following function should be invoked: + + void abort_creds(struct cred *new); + +This releases the lock on current->cred_replace_mutex that prepare_creds() got +and then releases the new credentials. + + +A typical credentials alteration function would look something like this: + + int alter_suid(uid_t suid) + { + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->suid = suid; + ret = security_alter_suid(new); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); + } + + +MANAGING CREDENTIALS +-------------------- + +There are some functions to help manage credentials: + + (*) void put_cred(const struct cred *cred); + + This releases a reference to the given set of credentials. If the + reference count reaches zero, the credentials will be scheduled for + destruction by the RCU system. + + (*) const struct cred *get_cred(const struct cred *cred); + + This gets a reference on a live set of credentials, returning a pointer to + that set of credentials. + + (*) struct cred *get_new_cred(struct cred *cred); + + This gets a reference on a set of credentials that is under construction + and is thus still mutable, returning a pointer to that set of credentials. + + +===================== +OPEN FILE CREDENTIALS +===================== + +When a new file is opened, a reference is obtained on the opening task's +credentials and this is attached to the file struct as 'f_cred' in place of +'f_uid' and 'f_gid'. Code that used to access file->f_uid and file->f_gid +should now access file->f_cred->fsuid and file->f_cred->fsgid. + +It is safe to access f_cred without the use of RCU or locking because the +pointer will not change over the lifetime of the file struct, and nor will the +contents of the cred struct pointed to, barring the exceptions listed above +(see the Task Credentials section). + + +======================================= +OVERRIDING THE VFS'S USE OF CREDENTIALS +======================================= + +Under some circumstances it is desirable to override the credentials used by +the VFS, and that can be done by calling into such as vfs_mkdir() with a +different set of credentials. This is done in the following places: + + (*) sys_faccessat(). + + (*) do_coredump(). + + (*) nfs4recover.c. diff --git a/include/linux/cred.h b/include/linux/cred.h index 8edb4d1d5427..794aab5c66e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -1,4 +1,4 @@ -/* Credentials management +/* Credentials management - see Documentation/credentials.txt * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -169,6 +169,12 @@ static inline struct cred *get_new_cred(struct cred *cred) * * Get a reference on the specified set of credentials. The caller must * release the reference. + * + * This is used to deal with a committed set of credentials. Although the + * pointer is const, this will temporarily discard the const and increment the + * usage count. The purpose of this is to attempt to catch at compile time the + * accidental alteration of a set of credentials that should be considered + * immutable. */ static inline const struct cred *get_cred(const struct cred *cred) { @@ -181,6 +187,10 @@ static inline const struct cred *get_cred(const struct cred *cred) * * Release a reference to a set of credentials, deleting them when the last ref * is released. + * + * This takes a const pointer to a set of credentials because the credentials + * on task_struct are attached by const pointers to prevent accidental + * alteration of otherwise immutable credential sets. */ static inline void put_cred(const struct cred *_cred) { diff --git a/kernel/cred.c b/kernel/cred.c index e6fcdd67b2ec..b8bd2f99d8ce 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -1,4 +1,4 @@ -/* Task credentials management +/* Task credentials management - see Documentation/credentials.txt * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) -- cgit v1.2.3 From 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Differentiate objective and effective subjective credentials on a task Differentiate the objective and real subjective credentials from the effective subjective credentials on a task by introducing a second credentials pointer into the task_struct. task_struct::real_cred then refers to the objective and apparent real subjective credentials of a task, as perceived by the other tasks in the system. task_struct::cred then refers to the effective subjective credentials of a task, as used by that task when it's actually running. These are not visible to the other tasks in the system. __task_cred(task) then refers to the objective/real credentials of the task in question. current_cred() refers to the effective subjective credentials of the current task. prepare_creds() uses the objective creds as a base and commit_creds() changes both pointers in the task_struct (indeed commit_creds() requires them to be the same). override_creds() and revert_creds() change the subjective creds pointer only, and the former returns the old subjective creds. These are used by NFSD, faccessat() and do_coredump(), and will by used by CacheFiles. In SELinux, current_has_perm() is provided as an alternative to task_has_perm(). This uses the effective subjective context of current, whereas task_has_perm() uses the objective/real context of the subject. Signed-off-by: David Howells Signed-off-by: James Morris --- fs/nfsd/auth.c | 5 +++- include/linux/cred.h | 29 +++++++++++---------- include/linux/init_task.h | 1 + include/linux/sched.h | 5 +++- kernel/cred.c | 38 ++++++++++++++++++--------- kernel/fork.c | 6 +++-- security/selinux/hooks.c | 65 ++++++++++++++++++++++++++++++----------------- 7 files changed, 95 insertions(+), 54 deletions(-) (limited to 'kernel') diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 836ffa1047d9..0184fe9b514c 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int flags = nfsexp_flags(rqstp, exp); int ret; + /* discard any old override before preparing the new set */ + revert_creds(get_cred(current->real_cred)); new = prepare_creds(); if (!new) return -ENOMEM; @@ -82,7 +84,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - return commit_creds(new); + put_cred(override_creds(new)); + return 0; oom: ret = -ENOMEM; diff --git a/include/linux/cred.h b/include/linux/cred.h index 794aab5c66e5..55a9c995d694 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,8 +146,8 @@ extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *) __deprecated; -extern void revert_creds(const struct cred *) __deprecated; +extern const struct cred *override_creds(const struct cred *); +extern void revert_creds(const struct cred *); extern void __init cred_init(void); /** @@ -202,32 +202,32 @@ static inline void put_cred(const struct cred *_cred) } /** - * current_cred - Access the current task's credentials + * current_cred - Access the current task's subjective credentials * - * Access the credentials of the current task. + * Access the subjective credentials of the current task. */ #define current_cred() \ (current->cred) /** - * __task_cred - Access another task's credentials + * __task_cred - Access a task's objective credentials * @task: The task to query * - * Access the credentials of another task. The caller must hold the - * RCU readlock. + * Access the objective credentials of a task. The caller must hold the RCU + * readlock. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference((task)->cred))) + ((const struct cred *)(rcu_dereference((task)->real_cred))) /** - * get_task_cred - Get another task's credentials + * get_task_cred - Get another task's objective credentials * @task: The task to query * - * Get the credentials of a task, pinning them so that they can't go away. - * Accessing a task's credentials directly is not permitted. + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. @@ -243,10 +243,11 @@ static inline void put_cred(const struct cred *_cred) }) /** - * get_current_cred - Get the current task's credentials + * get_current_cred - Get the current task's subjective credentials * - * Get the credentials of the current task, pinning them so that they can't go - * away. Accessing the current task's credentials directly is not permitted. + * Get the subjective credentials of the current task, pinning them so that + * they can't go away. Accessing the current task's credentials directly is + * not permitted. */ #define get_current_cred() \ (get_cred(current_cred())) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08c3b24ad9a8..2597858035cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ + .real_cred = &init_cred, \ .cred = &init_cred, \ .cred_exec_mutex = \ __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 121d655e460d..3443123b0709 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,10 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *cred; /* actual/objective task credentials (COW) */ + const struct cred *real_cred; /* objective and real subjective task + * credentials (COW) */ + const struct cred *cred; /* effective (overridable) subjective task + * credentials (COW) */ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/kernel/cred.c b/kernel/cred.c index b8bd2f99d8ce..f3ca10660617 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -35,7 +35,7 @@ static struct thread_group_cred init_tgcred = { * The initial credentials for the initial task */ struct cred init_cred = { - .usage = ATOMIC_INIT(3), + .usage = ATOMIC_INIT(4), .securebits = SECUREBITS_DEFAULT, .cap_inheritable = CAP_INIT_INH_SET, .cap_permitted = CAP_FULL_SET, @@ -120,6 +120,8 @@ EXPORT_SYMBOL(__put_cred); * prepare a new copy, which the caller then modifies and then commits by * calling commit_creds(). * + * Preparation involves making a copy of the objective creds for modification. + * * Returns a pointer to the new creds-to-be if successful, NULL otherwise. * * Call commit_creds() or abort_creds() to clean up. @@ -130,7 +132,7 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - BUG_ON(atomic_read(&task->cred->usage) < 1); + BUG_ON(atomic_read(&task->real_cred->usage) < 1); new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) @@ -262,6 +264,9 @@ error: * * We share if we can, but under some circumstances we have to generate a new * set. + * + * The new process gets the current process's subjective credentials as its + * objective and subjective credentials */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { @@ -278,6 +283,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { + p->real_cred = get_cred(p->cred); get_cred(p->cred); atomic_inc(&p->cred->user->processes); return 0; @@ -317,7 +323,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif atomic_inc(&new->user->processes); - p->cred = new; + p->cred = p->real_cred = get_cred(new); return 0; } @@ -326,7 +332,9 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * @new: The credentials to be assigned * * Install a new set of credentials to the current task, using RCU to replace - * the old set. + * the old set. Both the objective and the subjective credentials pointers are + * updated. This function may not be called if the subjective credentials are + * in an overridden state. * * This function eats the caller's reference to the new credentials. * @@ -338,12 +346,15 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old; + BUG_ON(task->cred != task->real_cred); + BUG_ON(atomic_read(&task->real_cred->usage) < 2); BUG_ON(atomic_read(&new->usage) < 1); - BUG_ON(atomic_read(&task->cred->usage) < 1); - old = task->cred; + old = task->real_cred; security_commit_creds(new, old); + get_cred(new); /* we will require a ref for the subj creds too */ + /* dumpability changes */ if (old->euid != new->euid || old->egid != new->egid || @@ -369,6 +380,7 @@ int commit_creds(struct cred *new) */ if (new->user != old->user) atomic_inc(&new->user->processes); + rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) atomic_dec(&old->user->processes); @@ -388,6 +400,8 @@ int commit_creds(struct cred *new) new->fsgid != old->fsgid) proc_id_connector(task, PROC_EVENT_GID); + /* release the old obj and subj refs both */ + put_cred(old); put_cred(old); return 0; } @@ -408,11 +422,11 @@ void abort_creds(struct cred *new) EXPORT_SYMBOL(abort_creds); /** - * override_creds - Temporarily override the current process's credentials + * override_creds - Override the current process's subjective credentials * @new: The credentials to be assigned * - * Install a set of temporary override credentials on the current process, - * returning the old set for later reversion. + * Install a set of temporary override subjective credentials on the current + * process, returning the old set for later reversion. */ const struct cred *override_creds(const struct cred *new) { @@ -424,11 +438,11 @@ const struct cred *override_creds(const struct cred *new) EXPORT_SYMBOL(override_creds); /** - * revert_creds - Revert a temporary credentials override + * revert_creds - Revert a temporary subjective credentials override * @old: The credentials to be restored * - * Revert a temporary set of override credentials to an old set, discarding the - * override set. + * Revert a temporary set of override subjective credentials to an old set, + * discarding the override set. */ void revert_creds(const struct cred *old) { diff --git a/kernel/fork.c b/kernel/fork.c index 82a7948a664e..af0d0f04585c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,6 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + put_cred(tsk->real_cred); put_cred(tsk->cred); delayacct_tsk_free(tsk); @@ -961,10 +962,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; - if (atomic_read(&p->cred->user->processes) >= + if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } @@ -1278,6 +1279,7 @@ bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); + put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 21a592184633..91b06f2aa963 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -161,7 +161,7 @@ static int selinux_secmark_enabled(void) */ static void cred_init_security(void) { - struct cred *cred = (struct cred *) current->cred; + struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); @@ -184,7 +184,7 @@ static inline u32 cred_sid(const struct cred *cred) } /* - * get the security ID of a task + * get the objective security ID of a task */ static inline u32 task_sid(const struct task_struct *task) { @@ -197,7 +197,7 @@ static inline u32 task_sid(const struct task_struct *task) } /* - * get the security ID of the current task + * get the subjective security ID of the current task */ static inline u32 current_sid(void) { @@ -1395,6 +1395,7 @@ static int cred_has_perm(const struct cred *actor, * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. * tsk1 is the actor and tsk2 is the target + * - this uses the default subjective creds of tsk1 */ static int task_has_perm(const struct task_struct *tsk1, const struct task_struct *tsk2, @@ -1410,6 +1411,22 @@ static int task_has_perm(const struct task_struct *tsk1, return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); } +/* + * Check permission between current and another task, e.g. signal checks, + * fork check, ptrace check, etc. + * current is the actor and tsk2 is the target + * - this uses current's subjective creds + */ +static int current_has_perm(const struct task_struct *tsk, + u32 perms) +{ + u32 sid, tsid; + + sid = current_sid(); + tsid = task_sid(tsk); + return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); +} + #if CAP_LAST_CAP > 63 #error Fix SELinux to handle capabilities > 63. #endif @@ -1807,7 +1824,7 @@ static int selinux_ptrace_may_access(struct task_struct *child, return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); } - return task_has_perm(current, child, PROCESS__PTRACE); + return current_has_perm(child, PROCESS__PTRACE); } static int selinux_ptrace_traceme(struct task_struct *parent) @@ -1826,7 +1843,7 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, { int error; - error = task_has_perm(current, target, PROCESS__GETCAP); + error = current_has_perm(target, PROCESS__GETCAP); if (error) return error; @@ -3071,7 +3088,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { - rc = task_has_perm(current, current, PROCESS__EXECSTACK); + rc = current_has_perm(current, PROCESS__EXECSTACK); } else if (vma->vm_file && vma->anon_vma) { /* * We are making executable a file mapping that has @@ -3220,7 +3237,7 @@ static int selinux_task_create(unsigned long clone_flags) if (rc) return rc; - return task_has_perm(current, current, PROCESS__FORK); + return current_has_perm(current, PROCESS__FORK); } /* @@ -3285,17 +3302,17 @@ static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) { - return task_has_perm(current, p, PROCESS__SETPGID); + return current_has_perm(p, PROCESS__SETPGID); } static int selinux_task_getpgid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETPGID); + return current_has_perm(p, PROCESS__GETPGID); } static int selinux_task_getsid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSESSION); + return current_has_perm(p, PROCESS__GETSESSION); } static void selinux_task_getsecid(struct task_struct *p, u32 *secid) @@ -3317,7 +3334,7 @@ static int selinux_task_setnice(struct task_struct *p, int nice) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) @@ -3328,12 +3345,12 @@ static int selinux_task_setioprio(struct task_struct *p, int ioprio) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getioprio(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) @@ -3350,7 +3367,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return task_has_perm(current, current, PROCESS__SETRLIMIT); + return current_has_perm(current, PROCESS__SETRLIMIT); return 0; } @@ -3363,17 +3380,17 @@ static int selinux_task_setscheduler(struct task_struct *p, int policy, struct s if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getscheduler(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_movememory(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_kill(struct task_struct *p, struct siginfo *info, @@ -3394,7 +3411,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, rc = avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); else - rc = task_has_perm(current, p, perm); + rc = current_has_perm(p, perm); return rc; } @@ -5250,7 +5267,7 @@ static int selinux_getprocattr(struct task_struct *p, unsigned len; if (current != p) { - error = task_has_perm(current, p, PROCESS__GETATTR); + error = current_has_perm(p, PROCESS__GETATTR); if (error) return error; } @@ -5309,15 +5326,15 @@ static int selinux_setprocattr(struct task_struct *p, * above restriction is ever removed. */ if (!strcmp(name, "exec")) - error = task_has_perm(current, p, PROCESS__SETEXEC); + error = current_has_perm(p, PROCESS__SETEXEC); else if (!strcmp(name, "fscreate")) - error = task_has_perm(current, p, PROCESS__SETFSCREATE); + error = current_has_perm(p, PROCESS__SETFSCREATE); else if (!strcmp(name, "keycreate")) - error = task_has_perm(current, p, PROCESS__SETKEYCREATE); + error = current_has_perm(p, PROCESS__SETKEYCREATE); else if (!strcmp(name, "sockcreate")) - error = task_has_perm(current, p, PROCESS__SETSOCKCREATE); + error = current_has_perm(p, PROCESS__SETSOCKCREATE); else if (!strcmp(name, "current")) - error = task_has_perm(current, p, PROCESS__SETCURRENT); + error = current_has_perm(p, PROCESS__SETCURRENT); else error = -EINVAL; if (error) -- cgit v1.2.3 From 3a3b7ce9336952ea7b9564d976d068a238976c9d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:28 +1100 Subject: CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler [Smack changes] Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 6 +++ include/linux/security.h | 28 +++++++++++ kernel/cred.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ security/capability.c | 12 +++++ security/security.c | 10 ++++ security/selinux/hooks.c | 46 ++++++++++++++++++ security/smack/smack_lsm.c | 37 +++++++++++++++ 7 files changed, 252 insertions(+) (limited to 'kernel') diff --git a/include/linux/cred.h b/include/linux/cred.h index 55a9c995d694..26c1ab179946 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,7 @@ struct user_struct; struct cred; +struct inode; /* * COW Supplementary groups list @@ -148,6 +149,11 @@ extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); +extern struct cred *prepare_kernel_cred(struct task_struct *); +extern int change_create_files_as(struct cred *, struct inode *); +extern int set_security_override(struct cred *, u32); +extern int set_security_override_from_ctx(struct cred *, const char *); +extern int set_create_files_as(struct cred *, struct inode *); extern void __init cred_init(void); /** diff --git a/include/linux/security.h b/include/linux/security.h index 56a0eed65673..59a11e19b617 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -587,6 +587,19 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new points to the new credentials. * @old points to the original credentials. * Install a new set of credentials. + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1381,6 +1394,8 @@ struct security_operations { int (*cred_prepare)(struct cred *new, const struct cred *old, gfp_t gfp); void (*cred_commit)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1632,6 +1647,8 @@ int security_task_create(unsigned long clone_flags); void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_commit_creds(struct cred *new, const struct cred *old); +int security_kernel_act_as(struct cred *new, u32 secid); +int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2151,6 +2168,17 @@ static inline void security_commit_creds(struct cred *new, { } +static inline int security_kernel_act_as(struct cred *cred, u32 secid) +{ + return 0; +} + +static inline int security_kernel_create_files_as(struct cred *cred, + struct inode *inode) +{ + return 0; +} + static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { diff --git a/kernel/cred.c b/kernel/cred.c index f3ca10660617..13697ca2bb38 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -462,3 +462,116 @@ void __init cred_init(void) cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } + +/** + * prepare_kernel_cred - Prepare a set of credentials for a kernel service + * @daemon: A userspace daemon to be used as a reference + * + * Prepare a set of credentials for a kernel service. This can then be used to + * override a task's own credentials so that work can be done on behalf of that + * task that requires a different subjective context. + * + * @daemon is used to provide a base for the security record, but can be NULL. + * If @daemon is supplied, then the security data will be derived from that; + * otherwise they'll be set to 0 and no groups, full capabilities and no keys. + * + * The caller may change these controls afterwards if desired. + * + * Returns the new credentials or NULL if out of memory. + * + * Does not take, and does not return holding current->cred_replace_mutex. + */ +struct cred *prepare_kernel_cred(struct task_struct *daemon) +{ + const struct cred *old; + struct cred *new; + + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + if (!new) + return NULL; + + if (daemon) + old = get_task_cred(daemon); + else + old = get_cred(&init_cred); + + get_uid(new->user); + get_group_info(new->group_info); + +#ifdef CONFIG_KEYS + atomic_inc(&init_tgcred.usage); + new->tgcred = &init_tgcred; + new->request_key_auth = NULL; + new->thread_keyring = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + goto error; + + atomic_set(&new->usage, 1); + put_cred(old); + return new; + +error: + put_cred(new); + return NULL; +} +EXPORT_SYMBOL(prepare_kernel_cred); + +/** + * set_security_override - Set the security ID in a set of credentials + * @new: The credentials to alter + * @secid: The LSM security ID to set + * + * Set the LSM security ID in a set of credentials so that the subjective + * security is overridden when an alternative set of credentials is used. + */ +int set_security_override(struct cred *new, u32 secid) +{ + return security_kernel_act_as(new, secid); +} +EXPORT_SYMBOL(set_security_override); + +/** + * set_security_override_from_ctx - Set the security ID in a set of credentials + * @new: The credentials to alter + * @secctx: The LSM security context to generate the security ID from. + * + * Set the LSM security ID in a set of credentials so that the subjective + * security is overridden when an alternative set of credentials is used. The + * security ID is specified in string form as a security context to be + * interpreted by the LSM. + */ +int set_security_override_from_ctx(struct cred *new, const char *secctx) +{ + u32 secid; + int ret; + + ret = security_secctx_to_secid(secctx, strlen(secctx), &secid); + if (ret < 0) + return ret; + + return set_security_override(new, secid); +} +EXPORT_SYMBOL(set_security_override_from_ctx); + +/** + * set_create_files_as - Set the LSM file create context in a set of credentials + * @new: The credentials to alter + * @inode: The inode to take the context from + * + * Change the LSM file creation context in a set of credentials to be the same + * as the object context of the specified inode, so that the new inodes have + * the same MAC context as that inode. + */ +int set_create_files_as(struct cred *new, struct inode *inode) +{ + new->fsuid = inode->i_uid; + new->fsgid = inode->i_gid; + return security_kernel_create_files_as(new, inode); +} +EXPORT_SYMBOL(set_create_files_as); diff --git a/security/capability.c b/security/capability.c index 185804f99ad1..b9e391425e6f 100644 --- a/security/capability.c +++ b/security/capability.c @@ -348,6 +348,16 @@ static void cap_cred_commit(struct cred *new, const struct cred *old) { } +static int cap_kernel_act_as(struct cred *new, u32 secid) +{ + return 0; +} + +static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + return 0; +} + static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return 0; @@ -889,6 +899,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, cred_free); set_to_cap_if_null(ops, cred_prepare); set_to_cap_if_null(ops, cred_commit); + set_to_cap_if_null(ops, kernel_act_as); + set_to_cap_if_null(ops, kernel_create_files_as); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index dc5babb2d6d8..038ef04b2c7f 100644 --- a/security/security.c +++ b/security/security.c @@ -616,6 +616,16 @@ void security_commit_creds(struct cred *new, const struct cred *old) return security_ops->cred_commit(new, old); } +int security_kernel_act_as(struct cred *new, u32 secid) +{ + return security_ops->kernel_act_as(new, secid); +} + +int security_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + return security_ops->kernel_create_files_as(new, inode); +} + int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return security_ops->task_setuid(id0, id1, id2, flags); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 91b06f2aa963..520f82ab3fbf 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3277,6 +3277,50 @@ static void selinux_cred_commit(struct cred *new, const struct cred *old) secondary_ops->cred_commit(new, old); } +/* + * set the security data for a kernel service + * - all the creation contexts are set to unlabelled + */ +static int selinux_kernel_act_as(struct cred *new, u32 secid) +{ + struct task_security_struct *tsec = new->security; + u32 sid = current_sid(); + int ret; + + ret = avc_has_perm(sid, secid, + SECCLASS_KERNEL_SERVICE, + KERNEL_SERVICE__USE_AS_OVERRIDE, + NULL); + if (ret == 0) { + tsec->sid = secid; + tsec->create_sid = 0; + tsec->keycreate_sid = 0; + tsec->sockcreate_sid = 0; + } + return ret; +} + +/* + * set the file creation context in a security record to the same as the + * objective context of the specified inode + */ +static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + struct inode_security_struct *isec = inode->i_security; + struct task_security_struct *tsec = new->security; + u32 sid = current_sid(); + int ret; + + ret = avc_has_perm(sid, isec->sid, + SECCLASS_KERNEL_SERVICE, + KERNEL_SERVICE__CREATE_FILES_AS, + NULL); + + if (ret == 0) + tsec->create_sid = isec->sid; + return 0; +} + static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { /* Since setuid only affects the current process, and @@ -5593,6 +5637,8 @@ static struct security_operations selinux_ops = { .cred_free = selinux_cred_free, .cred_prepare = selinux_cred_prepare, .cred_commit = selinux_cred_commit, + .kernel_act_as = selinux_kernel_act_as, + .kernel_create_files_as = selinux_kernel_create_files_as, .task_setuid = selinux_task_setuid, .task_fix_setuid = selinux_task_fix_setuid, .task_setgid = selinux_task_setgid, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index de396742abf4..8ad48161cef5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1011,6 +1011,41 @@ static void smack_cred_commit(struct cred *new, const struct cred *old) { } +/** + * smack_kernel_act_as - Set the subjective context in a set of credentials + * @new points to the set of credentials to be modified. + * @secid specifies the security ID to be set + * + * Set the security data for a kernel service. + */ +static int smack_kernel_act_as(struct cred *new, u32 secid) +{ + char *smack = smack_from_secid(secid); + + if (smack == NULL) + return -EINVAL; + + new->security = smack; + return 0; +} + +/** + * smack_kernel_create_files_as - Set the file creation label in a set of creds + * @new points to the set of credentials to be modified + * @inode points to the inode to use as a reference + * + * Set the file creation context in a set of credentials to the same + * as the objective context of the specified inode + */ +static int smack_kernel_create_files_as(struct cred *new, + struct inode *inode) +{ + struct inode_smack *isp = inode->i_security; + + new->security = isp->smk_inode; + return 0; +} + /** * smack_task_setpgid - Smack check on setting pgid * @p: the task object @@ -2641,6 +2676,8 @@ struct security_operations smack_ops = { .cred_free = smack_cred_free, .cred_prepare = smack_cred_prepare, .cred_commit = smack_cred_commit, + .kernel_act_as = smack_kernel_act_as, + .kernel_create_files_as = smack_kernel_create_files_as, .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, -- cgit v1.2.3 From ec4e0e2fe018992d980910db901637c814575914 Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Tue, 18 Nov 2008 22:41:57 -0800 Subject: sched: fix inconsistency when redistribute per-cpu tg->cfs_rq shares Impact: make load-balancing more consistent In the update_shares() path leading to tg_shares_up(), the calculation of per-cpu cfs_rq shares is rather erratic even under moderate task wake up rate. The problem is that the per-cpu tg->cfs_rq load weight used in the sd_rq_weight aggregation and actual redistribution of the cfs_rq->shares are collected at different time. Under moderate system load, we've seen quite a bit of variation on the cfs_rq->shares and ultimately wildly affects sched_entity's load weight. This patch caches the result of initial per-cpu load weight when doing the sum calculation, and then pass it down to update_group_shares_cpu() for redistributing per-cpu cfs_rq shares. This allows consistent total cfs_rq shares across all CPUs. It also simplifies the rounding and zero load weight check. Signed-off-by: Ken Chen Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index a4c156d9a4a5..93bfb086e60f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1453,27 +1453,13 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long sd_shares, unsigned long sd_rq_weight) { - int boost = 0; unsigned long shares; unsigned long rq_weight; if (!tg->se[cpu]) return; - rq_weight = tg->cfs_rq[cpu]->load.weight; - - /* - * If there are currently no tasks on the cpu pretend there is one of - * average load so that when a new task gets to run here it will not - * get delayed by group starvation. - */ - if (!rq_weight) { - boost = 1; - rq_weight = NICE_0_LOAD; - } - - if (unlikely(rq_weight > sd_rq_weight)) - rq_weight = sd_rq_weight; + rq_weight = tg->cfs_rq[cpu]->rq_weight; /* * \Sum shares * rq_weight @@ -1481,7 +1467,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, * \Sum rq_weight * */ - shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); + shares = (sd_shares * rq_weight) / sd_rq_weight; shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); if (abs(shares - tg->se[cpu]->load.weight) > @@ -1490,11 +1476,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long flags; spin_lock_irqsave(&rq->lock, flags); - /* - * record the actual number of shares, not the boosted amount. - */ - tg->cfs_rq[cpu]->shares = boost ? 0 : shares; - tg->cfs_rq[cpu]->rq_weight = rq_weight; + tg->cfs_rq[cpu]->shares = shares; __set_se_shares(tg->se[cpu], shares); spin_unlock_irqrestore(&rq->lock, flags); @@ -1508,13 +1490,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu, */ static int tg_shares_up(struct task_group *tg, void *data) { - unsigned long rq_weight = 0; + unsigned long weight, rq_weight = 0; unsigned long shares = 0; struct sched_domain *sd = data; int i; for_each_cpu_mask(i, sd->span) { - rq_weight += tg->cfs_rq[i]->load.weight; + /* + * If there are currently no tasks on the cpu pretend there + * is one of average load so that when a new task gets to + * run here it will not get delayed by group starvation. + */ + weight = tg->cfs_rq[i]->load.weight; + if (!weight) + weight = NICE_0_LOAD; + + tg->cfs_rq[i]->rq_weight = weight; + rq_weight += weight; shares += tg->cfs_rq[i]->shares; } @@ -1524,9 +1516,6 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; - if (!rq_weight) - rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; - for_each_cpu_mask(i, sd->span) update_group_shares_cpu(tg, i, shares, rq_weight); -- cgit v1.2.3 From 957ad0166e9f76a8561dafa5e14ef5bd3f5e9a3b Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 21 Nov 2008 01:30:36 +0100 Subject: sched: update comment for move_task_off_dead_cpu Impact: cleanup This commit: commit f7b4cddcc5aca03e80e357360c9424dfba5056c2 Author: Oleg Nesterov Date: Tue Oct 16 23:30:56 2007 -0700 do CPU_DEAD migrating under read_lock(tasklist) instead of write_lock_irq(ta Currently move_task_off_dead_cpu() is called under write_lock_irq(tasklist). This means it can't use task_lock() which is needed to improve migrating to take task's ->cpuset into account. Change the code to call move_task_off_dead_cpu() with irqs enabled, and change migrate_live_tasks() to use read_lock(tasklist). ...forgot to update the comment in front of move_task_off_dead_cpu. Reference: http://lkml.org/lkml/2008/6/23/135 Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 93bfb086e60f..a6085d5166dd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6094,7 +6094,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) /* * Figure out where task on dead CPU should go, use force if necessary. - * NOTE: interrupts should be disabled by the caller */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { -- cgit v1.2.3 From 18b6e0414e42d95183f07d8177e3ff0241abd825 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Wed, 15 Oct 2008 16:38:45 -0500 Subject: User namespaces: set of cleanups (v2) The user_ns is moved from nsproxy to user_struct, so that a struct cred by itself is sufficient to determine access (which it otherwise would not be). Corresponding ecryptfs fixes (by David Howells) are here as well. Fix refcounting. The following rules now apply: 1. The task pins the user struct. 2. The user struct pins its user namespace. 3. The user namespace pins the struct user which created it. User namespaces are cloned during copy_creds(). Unsharing a new user_ns is no longer possible. (We could re-add that, but it'll cause code duplication and doesn't seem useful if PAM doesn't need to clone user namespaces). When a user namespace is created, its first user (uid 0) gets empty keyrings and a clean group_info. This incorporates a previous patch by David Howells. Here is his original patch description: >I suggest adding the attached incremental patch. It makes the following >changes: > > (1) Provides a current_user_ns() macro to wrap accesses to current's user > namespace. > > (2) Fixes eCryptFS. > > (3) Renames create_new_userns() to create_user_ns() to be more consistent > with the other associated functions and because the 'new' in the name is > superfluous. > > (4) Moves the argument and permission checks made for CLONE_NEWUSER to the > beginning of do_fork() so that they're done prior to making any attempts > at allocation. > > (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds > to fill in rather than have it return the new root user. I don't imagine > the new root user being used for anything other than filling in a cred > struct. > > This also permits me to get rid of a get_uid() and a free_uid(), as the > reference the creds were holding on the old user_struct can just be > transferred to the new namespace's creator pointer. > > (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under > preparation rather than doing it in copy_creds(). > >David >Signed-off-by: David Howells Changelog: Oct 20: integrate dhowells comments 1. leave thread_keyring alone 2. use current_user_ns() in set_user() Signed-off-by: Serge Hallyn --- fs/ecryptfs/messaging.c | 13 ++++---- fs/ecryptfs/miscdev.c | 19 ++++------- include/linux/cred.h | 2 ++ include/linux/init_task.h | 1 - include/linux/nsproxy.h | 1 - include/linux/sched.h | 1 + include/linux/user_namespace.h | 13 +++----- kernel/cred.c | 15 +++++++-- kernel/fork.c | 19 +++++++++-- kernel/nsproxy.c | 15 ++------- kernel/sys.c | 4 +-- kernel/user.c | 47 ++++++++------------------ kernel/user_namespace.c | 75 +++++++++++++++++------------------------- 13 files changed, 96 insertions(+), 129 deletions(-) (limited to 'kernel') diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index e0b0a4e28b9b..6913f727624d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -360,7 +360,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; struct nsproxy *nsproxy; - struct user_namespace *current_user_ns; + struct user_namespace *tsk_user_ns; uid_t ctx_euid; int rc; @@ -385,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - current_user_ns = nsproxy->user_ns; + tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns; ctx_euid = task_euid(msg_ctx->task); - rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, current_user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); rcu_read_unlock(); mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { @@ -405,11 +405,11 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, euid, ctx_euid); goto unlock; } - if (current_user_ns != user_ns) { + if (tsk_user_ns != user_ns) { rc = -EBADMSG; printk(KERN_WARNING "%s: Received message from user_ns " "[0x%p]; expected message from user_ns [0x%p]\n", - __func__, user_ns, nsproxy->user_ns); + __func__, user_ns, tsk_user_ns); goto unlock; } if (daemon->pid != pid) { @@ -468,8 +468,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, uid_t euid = current_euid(); int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { rc = -ENOTCONN; printk(KERN_ERR "%s: User [%d] does not have a daemon " diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 047ac609695b..efd95a0ed1ea 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -47,8 +47,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -95,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { - rc = ecryptfs_spawn_daemon(&daemon, euid, - current->nsproxy->user_ns, + rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " @@ -153,8 +150,7 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); BUG_ON(daemon->pid != task_pid(current)); @@ -254,8 +250,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -295,7 +290,7 @@ check_list: goto check_list; } BUG_ON(euid != daemon->euid); - BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(current_user_ns() != daemon->user_ns); BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); @@ -468,7 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto out_free; } rc = ecryptfs_miscdev_response(&data[i], packet_size, - euid, current->nsproxy->user_ns, + euid, current_user_ns(), task_pid(current), seq); if (rc) printk(KERN_WARNING "%s: Failed to deliver miscdev " diff --git a/include/linux/cred.h b/include/linux/cred.h index 26c1ab179946..3282ee4318e7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -60,6 +60,7 @@ do { \ } while (0) extern struct group_info *groups_alloc(int); +extern struct group_info init_groups; extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); @@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) +#define current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) #define current_uid_gid(_uid, _gid) \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2597858035cd..959f5522d10a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy; .mnt_ns = NULL, \ INIT_NET_NS(net_ns) \ INIT_IPC_NS(ipc_ns) \ - .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index c8a768e59640..afad7dec1b36 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -27,7 +27,6 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; - struct user_namespace *user_ns; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2036e9f26020..7f8015a3082e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -638,6 +638,7 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; uid_t uid; + struct user_namespace *user_ns; #ifdef CONFIG_USER_SCHED struct task_group *tg; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4c2eec..315bcd375224 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -12,7 +12,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; - struct user_struct *root_user; + struct user_struct *creator; }; extern struct user_namespace init_user_ns; @@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return ns; } -extern struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns); +extern int create_user_ns(struct cred *new); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return &init_user_ns; } -static inline struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns) +static inline int create_user_ns(struct cred *new) { - if (flags & CLONE_NEWUSER) - return ERR_PTR(-EINVAL); - - return old_ns; + return -EINVAL; } static inline void put_user_ns(struct user_namespace *ns) diff --git a/kernel/cred.c b/kernel/cred.c index 13697ca2bb38..ff7bc071991c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct thread_group_cred *tgcred; #endif struct cred *new; + int ret; mutex_init(&p->cred_exec_mutex); @@ -293,6 +294,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!new) return -ENOMEM; + if (clone_flags & CLONE_NEWUSER) { + ret = create_user_ns(new); + if (ret < 0) + goto error_put; + } + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -309,8 +316,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!(clone_flags & CLONE_THREAD)) { tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); if (!tgcred) { - put_cred(new); - return -ENOMEM; + ret = -ENOMEM; + goto error_put; } atomic_set(&tgcred->usage, 1); spin_lock_init(&tgcred->lock); @@ -325,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) atomic_inc(&new->user->processes); p->cred = p->real_cred = get_cred(new); return 0; + +error_put: + put_cred(new); + return ret; } /** diff --git a/kernel/fork.c b/kernel/fork.c index 29c18c14812d..1dd89451fae4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -976,7 +976,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->real_cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != INIT_USER) goto bad_fork_free; } @@ -1334,6 +1334,20 @@ long do_fork(unsigned long clone_flags, int trace = 0; long nr; + /* + * Do some preliminary argument and permissions checking before we + * actually start allocating stuff + */ + if (clone_flags & CLONE_NEWUSER) { + if (clone_flags & CLONE_THREAD) + return -EINVAL; + /* hopefully this check will go away when userns support is + * complete + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } + /* * We hope to recycle these flags after 2.6.26 */ @@ -1581,8 +1595,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| - CLONE_NEWNET)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 1d3ef29a2583..63598dca2d0c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_pid; } - new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); - if (IS_ERR(new_nsp->user_ns)) { - err = PTR_ERR(new_nsp->user_ns); - goto out_user; - } - new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); if (IS_ERR(new_nsp->net_ns)) { err = PTR_ERR(new_nsp->net_ns); @@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->user_ns) - put_user_ns(new_nsp->user_ns); -out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); out_pid: @@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) get_nsproxy(old_ns); if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET))) + CLONE_NEWPID | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) { @@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns) put_ipc_ns(ns->ipc_ns); if (ns->pid_ns) put_pid_ns(ns->pid_ns); - if (ns->user_ns) - put_user_ns(ns->user_ns); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } @@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWNET))) + CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/kernel/sys.c b/kernel/sys.c index ab735040468a..ebe65c2c9873 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -565,13 +565,13 @@ static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new->uid); + new_user = alloc_uid(current_user_ns(), new->uid); if (!new_user) return -EAGAIN; if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && - new_user != current->nsproxy->user_ns->root_user) { + new_user != INIT_USER) { free_uid(new_user); return -EAGAIN; } diff --git a/kernel/user.c b/kernel/user.c index d476307dd4b0..c0ef3a464438 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,9 +20,9 @@ struct user_namespace init_user_ns = { .kref = { - .refcount = ATOMIC_INIT(2), + .refcount = ATOMIC_INIT(1), }, - .root_user = &root_user, + .creator = &root_user, }; EXPORT_SYMBOL_GPL(init_user_ns); @@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep; */ static DEFINE_SPINLOCK(uidhash_lock); +/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ struct user_struct root_user = { - .__count = ATOMIC_INIT(1), + .__count = ATOMIC_INIT(2), .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, + .user_ns = &init_user_ns, #ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif @@ -314,12 +316,13 @@ done: * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { /* restore back the count */ atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); + put_user_ns(up->user_ns); INIT_WORK(&up->work, remove_user_sysfs_dir); schedule_work(&up->work); } @@ -335,13 +338,14 @@ static inline void uids_mutex_unlock(void) { } * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); sched_destroy_user(up); key_put(up->uid_keyring); key_put(up->session_keyring); + put_user_ns(up->user_ns); kmem_cache_free(uid_cachep, up); } @@ -357,7 +361,7 @@ struct user_struct *find_user(uid_t uid) { struct user_struct *ret; unsigned long flags; - struct user_namespace *ns = current->nsproxy->user_ns; + struct user_namespace *ns = current_user()->user_ns; spin_lock_irqsave(&uidhash_lock, flags); ret = uid_hash_find(uid, uidhashentry(ns, uid)); @@ -404,6 +408,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) if (sched_create_user(new) < 0) goto out_free_user; + new->user_ns = get_user_ns(ns); + if (uids_user_create(new)) goto out_destoy_sched; @@ -427,7 +433,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) up = new; } spin_unlock_irq(&uidhash_lock); - } uids_mutex_unlock(); @@ -436,6 +441,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) out_destoy_sched: sched_destroy_user(new); + put_user_ns(new->user_ns); out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: @@ -443,33 +449,6 @@ out_unlock: return NULL; } -#ifdef CONFIG_USER_NS -void release_uids(struct user_namespace *ns) -{ - int i; - unsigned long flags; - struct hlist_head *head; - struct hlist_node *nd; - - spin_lock_irqsave(&uidhash_lock, flags); - /* - * collapse the chains so that the user_struct-s will - * be still alive, but not in hashes. subsequent free_uid() - * will free them. - */ - for (i = 0; i < UIDHASH_SZ; i++) { - head = ns->uidhash_table + i; - while (!hlist_empty(head)) { - nd = head->first; - hlist_del_init(nd); - } - } - spin_unlock_irqrestore(&uidhash_lock, flags); - - free_uid(ns->root_user); -} -#endif - static int __init uid_cache_init(void) { int n; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d9c51d67333..79084311ee57 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,70 +9,55 @@ #include #include #include +#include /* - * Clone a new ns copying an original user ns, setting refcount to 1 - * @old_ns: namespace to clone - * Return NULL on error (failure to kmalloc), new ns otherwise + * Create a new user namespace, deriving the creator from the user in the + * passed credentials, and replacing that user with the new root user for the + * new namespace. + * + * This is called by copy_creds(), which will finish setting the target task's + * credentials. */ -static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) +int create_user_ns(struct cred *new) { struct user_namespace *ns; - struct user_struct *new_user; - struct cred *new; + struct user_struct *root_user; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); if (!ns) - return ERR_PTR(-ENOMEM); + return -ENOMEM; kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) INIT_HLIST_HEAD(ns->uidhash_table + n); - /* Insert new root user. */ - ns->root_user = alloc_uid(ns, 0); - if (!ns->root_user) { + /* Alloc new root user. */ + root_user = alloc_uid(ns, 0); + if (!root_user) { kfree(ns); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - /* Reset current->user with a new one */ - new_user = alloc_uid(ns, current_uid()); - if (!new_user) { - free_uid(ns->root_user); - kfree(ns); - return ERR_PTR(-ENOMEM); - } - - /* Install the new user */ - new = prepare_creds(); - if (!new) { - free_uid(new_user); - free_uid(ns->root_user); - kfree(ns); - } - free_uid(new->user); - new->user = new_user; - commit_creds(new); - return ns; -} - -struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) -{ - struct user_namespace *new_ns; - - BUG_ON(!old_ns); - get_user_ns(old_ns); - - if (!(flags & CLONE_NEWUSER)) - return old_ns; + /* set the new root user in the credentials under preparation */ + ns->creator = new->user; + new->user = root_user; + new->uid = new->euid = new->suid = new->fsuid = 0; + new->gid = new->egid = new->sgid = new->fsgid = 0; + put_group_info(new->group_info); + new->group_info = get_group_info(&init_groups); +#ifdef CONFIG_KEYS + key_put(new->request_key_auth); + new->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - new_ns = clone_user_ns(old_ns); + /* alloc_uid() incremented the userns refcount. Just set it to 1 */ + kref_set(&ns->kref, 1); - put_user_ns(old_ns); - return new_ns; + return 0; } void free_user_ns(struct kref *kref) @@ -80,7 +65,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - release_uids(ns); + free_uid(ns->creator); kfree(ns); } EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3 From 6ded6ab9be4f6164aef1c527407c1b94f0929799 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Mon, 24 Nov 2008 16:24:10 -0500 Subject: User namespaces: use the current_user_ns() macro Fix up the last current_user()->user_ns instance to use current_user_ns(). Signed-off-by: Serge E. Hallyn --- kernel/user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/user.c b/kernel/user.c index c0ef3a464438..97202cb29adc 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -361,7 +361,7 @@ struct user_struct *find_user(uid_t uid) { struct user_struct *ret; unsigned long flags; - struct user_namespace *ns = current_user()->user_ns; + struct user_namespace *ns = current_user_ns(); spin_lock_irqsave(&uidhash_lock, flags); ret = uid_hash_find(uid, uidhashentry(ns, uid)); -- cgit v1.2.3 From 70574a996fc7a70c5586eb56bd92a544eccf18b6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 28 Nov 2008 22:08:00 +0300 Subject: sched: move double_unlock_balance() higher Move double_lock_balance()/double_unlock_balance() higher to fix the following with gcc-3.4.6: CC kernel/sched.o In file included from kernel/sched.c:1605: kernel/sched_rt.c: In function `find_lock_lowest_rq': kernel/sched_rt.c:914: sorry, unimplemented: inlining failed in call to 'double_unlock_balance': function body not available kernel/sched_rt.c:1077: sorry, unimplemented: called from here make[2]: *** [kernel/sched.o] Error 1 Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar --- kernel/sched.c | 67 +++++++++++++++++++++++++++---------------------------- kernel/sched_rt.c | 4 ---- 2 files changed, 33 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3d1ee429219b..6a99703e0eb0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1581,6 +1581,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) #endif +/* + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. + */ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) + __releases(this_rq->lock) + __acquires(busiest->lock) + __acquires(this_rq->lock) +{ + int ret = 0; + + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } + if (unlikely(!spin_trylock(&busiest->lock))) { + if (busiest < this_rq) { + spin_unlock(&this_rq->lock); + spin_lock(&busiest->lock); + spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); + ret = 1; + } else + spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); + } + return ret; +} + +static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) + __releases(busiest->lock) +{ + spin_unlock(&busiest->lock); + lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); +} #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -2780,40 +2813,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) __release(rq2->lock); } -/* - * double_lock_balance - lock the busiest runqueue, this_rq is locked already. - */ -static int double_lock_balance(struct rq *this_rq, struct rq *busiest) - __releases(this_rq->lock) - __acquires(busiest->lock) - __acquires(this_rq->lock) -{ - int ret = 0; - - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); - BUG_ON(1); - } - if (unlikely(!spin_trylock(&busiest->lock))) { - if (busiest < this_rq) { - spin_unlock(&this_rq->lock); - spin_lock(&busiest->lock); - spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); - ret = 1; - } else - spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); - } - return ret; -} - -static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) - __releases(busiest->lock) -{ - spin_unlock(&busiest->lock); - lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); -} - /* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2bdd44423599..587a16e2a8f5 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -909,10 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) /* Only try algorithms three times */ #define RT_MAX_TRIES 3 -static int double_lock_balance(struct rq *this_rq, struct rq *busiest); -static inline void double_unlock_balance(struct rq *this_rq, - struct rq *busiest); - static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) -- cgit v1.2.3 From 6c415b9234a8c71f290e5d4fddc467f103f32719 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Mon, 1 Dec 2008 20:49:05 +0530 Subject: sched: add uid information to sched_debug for CONFIG_USER_SCHED Impact: extend information in /proc/sched_debug This patch adds uid information in sched_debug for CONFIG_USER_SCHED Signed-off-by: Arun R Bharadwaj Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched.c | 10 ++++++++++ kernel/sched_debug.c | 6 +++++- kernel/user.c | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a69c4d224ee..d8733f07d80b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2218,6 +2218,7 @@ extern void normalize_rt_tasks(void); extern struct task_group init_task_group; #ifdef CONFIG_USER_SCHED extern struct task_group root_task_group; +extern void set_tg_uid(struct user_struct *user); #endif extern struct task_group *sched_create_group(struct task_group *parent); diff --git a/kernel/sched.c b/kernel/sched.c index 6a99703e0eb0..4c7388ef5be7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -261,6 +261,10 @@ struct task_group { struct cgroup_subsys_state css; #endif +#ifdef CONFIG_USER_SCHED + uid_t uid; +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; @@ -286,6 +290,12 @@ struct task_group { #ifdef CONFIG_USER_SCHED +/* Helper function to pass uid information to create_sched_user() */ +void set_tg_uid(struct user_struct *user) +{ + user->tg->uid = user->uid; +} + /* * Root task group. * Every UID task group (including init_task_group aka UID-0) will diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index baf2f17af462..4293cfa9681d 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -160,10 +160,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cgroup_path(tg->css.cgroup, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); +#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) + { + uid_t uid = cfs_rq->tg->uid; + SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid); + } #else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); #endif - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..cec2224bc9f5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up) if (IS_ERR(up->tg)) rc = -ENOMEM; + set_tg_uid(up); + return rc; } -- cgit v1.2.3 From c37bbb0fdcc01610fd55604eb6927210a1d20044 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 3 Dec 2008 13:17:06 -0600 Subject: user namespaces: let user_ns be cloned with fairsched (These two patches are in the next-unacked branch of git://git.kernel.org/pub/scm/linux/kernel/git/sergeh/userns-2.6. If they get some ACKs, then I hope to feed this into security-next. After these two, I think we're ready to tackle userns+capabilities) Fairsched creates a per-uid directory under /sys/kernel/uids/. So when you clone(CLONE_NEWUSER), it tries to create /sys/kernel/uids/0, which already exists, and you get back -ENOMEM. This was supposed to be fixed by sysfs tagging, but that was postponed (ok, rejected until sysfs locking is fixed). So, just as with network namespaces, we just don't create those directories for user namespaces other than the init. Signed-off-by: Serge E. Hallyn Signed-off-by: James Morris --- kernel/user.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/user.c b/kernel/user.c index 97202cb29adc..6c924bc48c08 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -246,6 +246,8 @@ static int uids_user_create(struct user_struct *up) int error; memset(kobj, 0, sizeof(struct kobject)); + if (up->user_ns != &init_user_ns) + return 0; kobj->kset = uids_kset; error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid); if (error) { @@ -281,6 +283,8 @@ static void remove_user_sysfs_dir(struct work_struct *w) unsigned long flags; int remove_user = 0; + if (up->user_ns != &init_user_ns) + return; /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() * atomic. */ -- cgit v1.2.3 From 7657d90497f98426af17f0ac633a9b335bb7a8fb Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 3 Dec 2008 13:17:33 -0600 Subject: user namespaces: require cap_set{ug}id for CLONE_NEWUSER While ideally CLONE_NEWUSER will eventually require no privilege, the required permission checks are currently not there. As a result, CLONE_NEWUSER has the same effect as a setuid(0)+setgroups(1,"0"). While we already require CAP_SYS_ADMIN, requiring CAP_SETUID and CAP_SETGID seems appropriate. Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Signed-off-by: James Morris --- kernel/fork.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 1dd89451fae4..e3a85b33107e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1344,7 +1344,8 @@ long do_fork(unsigned long clone_flags, /* hopefully this check will go away when userns support is * complete */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || + !capable(CAP_SETGID)) return -EPERM; } -- cgit v1.2.3 From 5436499e6098759c2340f8b906ea52f993dc4efb Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Sun, 7 Dec 2008 18:47:37 -0800 Subject: sched: fix sd_parent_degenerate on non-numa smp machine Impact: optimize the sched domains tree some more The addition of SD_SERIALIZE flag added to SD_NODE_INIT prevented top level dummy numa sched_domain to be properly degenerated on non-numa smp machine. The reason is that in sd_parent_degenerate(), it found that the child and parent does not have comon sched_domain flags due to SD_SERIALIZE. However, for non-numa smp box, the top level is a dummy with a single sched_group. Filter out SD_SERIALIZE if it is on non-numa machine to properly degenerate top level node sched_domain. this will cut back some of the sd domain walk in the load balancer code. Signed-off-by: Ken Chen Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 152828239ef0..74498c840f93 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6768,6 +6768,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_EXEC | SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES); + if (nr_node_ids == 1) + pflags &= ~SD_SERIALIZE; } if (~cflags & pflags) return 0; -- cgit v1.2.3 From efbe027e95dc13ac343b6130948418d7ead7ddf1 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 8 Dec 2008 20:52:49 +0530 Subject: sched: idle_balance() does not call load_balance_newidle() Impact: fix SD_BALANCE_NEWIDLEand broaden its use load_balance_newidle() does not get called if SD_BALANCE_NEWIDLE is set at higher level domain (3-CPU) and not in low level domain (2-MC). pulled_task is initialised to -1 and checked for non-zero which is always true if the lowest level sched_domain does not have SD_BALANCE_NEWIDLE flag set. Signed-off-by: Vaidyanathan Srinivasan Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 74498c840f93..bb9c6384d077 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3685,7 +3685,7 @@ out_balanced: static void idle_balance(int this_cpu, struct rq *this_rq) { struct sched_domain *sd; - int pulled_task = -1; + int pulled_task = 0; unsigned long next_balance = jiffies + HZ; cpumask_t tmpmask; -- cgit v1.2.3 From 94d6a5f7341ebaff53d4e41cc81fab37f0d9fbed Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 Dec 2008 15:52:21 -0600 Subject: user namespaces: document CFS behavior Documented the currently bogus state of support for CFS user groups with user namespaces. In particular, all users in a user namespace should be children of the user which created the user namespace. This is yet to be implemented. Signed-off-by: Serge E. Hallyn Acked-by: Dhaval Giani Signed-off-by: Serge E. Hallyn Signed-off-by: James Morris --- Documentation/scheduler/sched-design-CFS.txt | 21 +++++++++++++++++++++ kernel/user.c | 8 +++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index eb471c7a905e..8398ca4ff4ed 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -273,3 +273,24 @@ task groups and modify their CPU share using the "cgroups" pseudo filesystem. # #Launch gmplayer (or your favourite movie player) # echo > multimedia/tasks + +8. Implementation note: user namespaces + +User namespaces are intended to be hierarchical. But they are currently +only partially implemented. Each of those has ramifications for CFS. + +First, since user namespaces are hierarchical, the /sys/kernel/uids +presentation is inadequate. Eventually we will likely want to use sysfs +tagging to provide private views of /sys/kernel/uids within each user +namespace. + +Second, the hierarchical nature is intended to support completely +unprivileged use of user namespaces. So if using user groups, then +we want the users in a user namespace to be children of the user +who created it. + +That is currently unimplemented. So instead, every user in a new +user namespace will receive 1024 shares just like any user in the +initial user namespace. Note that at the moment creation of a new +user namespace requires each of CAP_SYS_ADMIN, CAP_SETUID, and +CAP_SETGID. diff --git a/kernel/user.c b/kernel/user.c index 6c924bc48c08..6608a3d8ca61 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -239,7 +239,13 @@ static struct kobj_type uids_ktype = { .release = uids_release, }; -/* create /sys/kernel/uids//cpu_share file for this user */ +/* + * Create /sys/kernel/uids//cpu_share file for this user + * We do not create this file for users in a user namespace (until + * sysfs tagging is implemented). + * + * See Documentation/scheduler/sched-design-CFS.txt for ramifications. + */ static int uids_user_create(struct user_struct *up) { struct kobject *kobj = &up->kobj; -- cgit v1.2.3 From ee79d1bdb6a10499e53f80b1e8d14110215178ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Dec 2008 18:49:50 +0100 Subject: sched: let arch_update_cpu_topology indicate if topology changed Change arch_update_cpu_topology so it returns 1 if the cpu topology changed and 0 if it didn't change. This will be useful for the next patch which adds a call to this function in partition_sched_domains. Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- arch/s390/kernel/topology.c | 5 +++-- include/linux/topology.h | 2 +- kernel/sched.c | 8 +++++++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba1..bf96f1b5c6ec 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -212,7 +212,7 @@ static void update_cpu_core_map(void) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -void arch_update_cpu_topology(void) +int arch_update_cpu_topology(void) { struct tl_info *info = tl_info; struct sys_device *sysdev; @@ -221,7 +221,7 @@ void arch_update_cpu_topology(void) if (!machine_has_topology) { update_cpu_core_map(); topology_update_polarization_simple(); - return; + return 0; } stsi(info, 15, 1, 2); tl_to_cores(info); @@ -230,6 +230,7 @@ void arch_update_cpu_topology(void) sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } + return 1; } static void topology_work_fn(struct work_struct *work) diff --git a/include/linux/topology.h b/include/linux/topology.h index 117f1b7405cf..0c5b5ac36d8e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -49,7 +49,7 @@ for_each_online_node(node) \ if (nr_cpus_node(node)) -void arch_update_cpu_topology(void); +int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 diff --git a/kernel/sched.c b/kernel/sched.c index ef212da928e8..fcfbbd9dbd60 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7675,8 +7675,14 @@ static struct sched_domain_attr *dattr_cur; */ static cpumask_t fallback_doms; -void __attribute__((weak)) arch_update_cpu_topology(void) +/* + * arch_update_cpu_topology lets virtualized architectures update the + * cpu core maps. It is supposed to return 1 if the topology changed + * or 0 if it stayed the same. + */ +int __attribute__((weak)) arch_update_cpu_topology(void) { + return 0; } /* -- cgit v1.2.3 From d65bd5ecb2bd166cea4952a59b7e16cc3ad6ef6c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Dec 2008 18:49:51 +0100 Subject: sched: add missing arch_update_cpu_topology() call arch_reinit_sched_domains() used to call arch_update_cpu_topology() via arch_init_sched_domains(). This call got lost with e761b7725234276a802322549cee5255305a0930 ("cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment (take 2)". So we might end up with outdated and missing cpus in the cpu core maps (architecture used to call arch_reinit_sched_domains if cpu topology changed). This adds a call to arch_update_cpu_topology in partition_sched_domains which gets called whenever scheduling domains get updated. Which is what is supposed to happen when cpu topology changes. Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index fcfbbd9dbd60..ad7b93be5691 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7774,17 +7774,21 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; + int new_topology; mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); + /* Let architecture update cpu core mappings. */ + new_topology = arch_update_cpu_topology(); + n = doms_new ? ndoms_new : 0; /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { - for (j = 0; j < n; j++) { + for (j = 0; j < n && !new_topology; j++) { if (cpus_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; @@ -7804,7 +7808,7 @@ match1: /* Build new domains */ for (i = 0; i < ndoms_new; i++) { - for (j = 0; j < ndoms_cur; j++) { + for (j = 0; j < ndoms_cur && !new_topology; j++) { if (cpus_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; -- cgit v1.2.3 From 03e89e4574a680af15f59329b061f35d9813aff4 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 16 Dec 2008 08:45:30 +0100 Subject: sched: fix wakeup preemption clock Impact: sharpen the wakeup-granularity to always be against current scheduler time It was possible to do the preemption check against an old time stamp. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- kernel/sched_fair.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index ad7b93be5691..88215066efae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2266,6 +2266,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) smp_wmb(); rq = task_rq_lock(p, &flags); + update_rq_clock(rq); old_state = p->state; if (!(old_state & state)) goto out; @@ -2323,7 +2324,6 @@ out_activate: schedstat_inc(p, se.nr_wakeups_local); else schedstat_inc(p, se.nr_wakeups_remote); - update_rq_clock(rq); activate_task(rq, p, 1); success = 1; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 98345e45b059..928cd74cff0d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1345,12 +1345,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; struct sched_entity *se = &curr->se, *pse = &p->se; + struct cfs_rq *cfs_rq = task_cfs_rq(curr); - if (unlikely(rt_prio(p->prio))) { - struct cfs_rq *cfs_rq = task_cfs_rq(curr); + update_curr(cfs_rq); - update_rq_clock(rq); - update_curr(cfs_rq); + if (unlikely(rt_prio(p->prio))) { resched_task(curr); return; } -- cgit v1.2.3 From 34f28ecd0f4bdc733c681294d02d9fab5880591b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Dec 2008 08:45:31 +0100 Subject: sched: optimize update_curr() Impact: micro-optimization Skip the hard work when there is none. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 928cd74cff0d..5ad4440f0fc4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -492,6 +492,8 @@ static void update_curr(struct cfs_rq *cfs_rq) * overflow on 32 bits): */ delta_exec = (unsigned long)(now - curr->exec_start); + if (!delta_exec) + return; __update_curr(cfs_rq, curr, delta_exec); curr->exec_start = now; -- cgit v1.2.3 From 720f54988e17b33f3f477010b3a68ee872d20d5a Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Mon, 15 Dec 2008 22:02:01 -0800 Subject: sched, cpuacct: refactoring cpuusage_read / cpuusage_write Impact: micro-optimize the code on 64-bit architectures In the thread regarding to 'export percpu cpuacct cgroup stats' http://lkml.org/lkml/2008/12/7/13 akpm pointed out that current cpuacct code is inefficient. This patch refactoring the following: * make cpu_rq locking only on 32-bit * change iterator to each_present_cpu instead of each_possible_cpu to make it hotplug friendly. It's a bit of code churn, but I was rewarded with 160 byte code size saving on x86-64 arch and zero code size change on i386. Signed-off-by: Ken Chen Cc: Paul Menage Cc: Li Zefan Signed-off-by: Ingo Molnar --- kernel/sched.c | 56 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 88215066efae..41b7e2d524d6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9275,6 +9275,41 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) kfree(ca); } +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) +{ + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); + u64 data; + +#ifndef CONFIG_64BIT + /* + * Take rq->lock to make 64-bit read safe on 32-bit platforms. + */ + spin_lock_irq(&cpu_rq(cpu)->lock); + data = *cpuusage; + spin_unlock_irq(&cpu_rq(cpu)->lock); +#else + data = *cpuusage; +#endif + + return data; +} + +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) +{ + u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); + +#ifndef CONFIG_64BIT + /* + * Take rq->lock to make 64-bit write safe on 32-bit platforms. + */ + spin_lock_irq(&cpu_rq(cpu)->lock); + *cpuusage = val; + spin_unlock_irq(&cpu_rq(cpu)->lock); +#else + *cpuusage = val; +#endif +} + /* return total cpu usage (in nanoseconds) of a group */ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) { @@ -9282,17 +9317,8 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) u64 totalcpuusage = 0; int i; - for_each_possible_cpu(i) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, i); - - /* - * Take rq->lock to make 64-bit addition safe on 32-bit - * platforms. - */ - spin_lock_irq(&cpu_rq(i)->lock); - totalcpuusage += *cpuusage; - spin_unlock_irq(&cpu_rq(i)->lock); - } + for_each_present_cpu(i) + totalcpuusage += cpuacct_cpuusage_read(ca, i); return totalcpuusage; } @@ -9309,13 +9335,9 @@ static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, goto out; } - for_each_possible_cpu(i) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, i); + for_each_present_cpu(i) + cpuacct_cpuusage_write(ca, i, 0); - spin_lock_irq(&cpu_rq(i)->lock); - *cpuusage = 0; - spin_unlock_irq(&cpu_rq(i)->lock); - } out: return err; } -- cgit v1.2.3 From e9515c3c9feecd74174c2998add0db51e02abb8d Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Mon, 15 Dec 2008 22:04:15 -0800 Subject: sched, cpuacct: export percpu cpuacct cgroup stats This patch export per-cpu CPU cycle usage for a given cpuacct cgroup. There is a need for a user space monitor daemon to track group CPU usage on per-cpu base. It is also useful for monitoring CFS load balancer behavior by tracking per CPU group usage. Signed-off-by: Ken Chen Reviewed-by: Li Zefan Reviewed-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 41b7e2d524d6..f53e2b8ef521 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9342,12 +9342,32 @@ out: return err; } +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, + struct seq_file *m) +{ + struct cpuacct *ca = cgroup_ca(cgroup); + u64 percpu; + int i; + + for_each_present_cpu(i) { + percpu = cpuacct_cpuusage_read(ca, i); + seq_printf(m, "%llu ", (unsigned long long) percpu); + } + seq_printf(m, "\n"); + return 0; +} + static struct cftype files[] = { { .name = "usage", .read_u64 = cpuusage_read, .write_u64 = cpuusage_write, }, + { + .name = "usage_percpu", + .read_seq_string = cpuacct_percpu_seq_read, + }, + }; static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) -- cgit v1.2.3 From 80f40ee4a07530cc3acbc239a9299ec47025825b Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 15 Dec 2008 11:56:48 +0530 Subject: sched: use RCU variant of list traversal in for_each_leaf_rt_rq() Impact: fix potential of rare crash for_each_leaf_rt_rq() walks an RCU protected list (rq->leaf_rt_rq_list), but doesn't use list_for_each_entry_rcu(). Fix this. Signed-off-by: Bharata B Rao Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d9ba9d5f99d6..7bdf84c85ccd 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -77,7 +77,7 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) } #define for_each_leaf_rt_rq(rt_rq, rq) \ - list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) + list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) { -- cgit v1.2.3 From 9c2c48020ec0dd6ecd27e5a1298f73b40d85a595 Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Tue, 16 Dec 2008 23:41:22 -0800 Subject: schedstat: consolidate per-task cpu runtime stats Impact: simplify code When we turn on CONFIG_SCHEDSTATS, per-task cpu runtime is accumulated twice. Once in task->se.sum_exec_runtime and once in sched_info.cpu_time. These two stats are exactly the same. Given that task->se.sum_exec_runtime is always accumulated by the core scheduler, sched_info can reuse that data instead of duplicate the accounting. Signed-off-by: Ken Chen Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/proc/base.c | 2 +- include/linux/sched.h | 3 +-- kernel/delayacct.c | 2 +- kernel/sched.c | 2 ++ kernel/sched_stats.h | 5 ++--- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/fs/proc/base.c b/fs/proc/base.c index d4677603c889..4d745bac768c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -347,7 +347,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) static int proc_pid_schedstat(struct task_struct *task, char *buffer) { return sprintf(buffer, "%llu %llu %lu\n", - task->sched_info.cpu_time, + task->se.sum_exec_runtime, task->sched_info.run_delay, task->sched_info.pcount); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cccd6dc5d66..2d1e840ddd35 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -670,8 +670,7 @@ struct reclaim_state; struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ - unsigned long long cpu_time, /* time spent on the cpu */ - run_delay; /* time spent waiting on a runqueue */ + unsigned long long run_delay; /* time spent waiting on a runqueue */ /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ diff --git a/kernel/delayacct.c b/kernel/delayacct.c index b3179dad71be..abb6e17505e2 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -127,7 +127,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; - t3 = tsk->sched_info.cpu_time; + t3 = tsk->se.sum_exec_runtime; d->cpu_count += t1; diff --git a/kernel/sched.c b/kernel/sched.c index f53e2b8ef521..fd835fc320b8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -596,6 +596,8 @@ struct rq { #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; + unsigned long long rq_cpu_time; + /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_exp_empty; diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 7dbf72a2b02c..3b01098164c8 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -31,7 +31,7 @@ static int show_schedstat(struct seq_file *seq, void *v) rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count, rq->sched_switch, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, - rq->rq_sched_info.cpu_time, + rq->rq_cpu_time, rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); seq_printf(seq, "\n"); @@ -123,7 +123,7 @@ static inline void rq_sched_info_depart(struct rq *rq, unsigned long long delta) { if (rq) - rq->rq_sched_info.cpu_time += delta; + rq->rq_cpu_time += delta; } static inline void @@ -236,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t) unsigned long long delta = task_rq(t)->clock - t->sched_info.last_arrival; - t->sched_info.cpu_time += delta; rq_sched_info_depart(task_rq(t), delta); if (t->state == TASK_RUNNING) -- cgit v1.2.3 From 9bb482476c6c9d1ae033306440c51ceac93ea80c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Dec 2008 11:30:08 +0000 Subject: allow stripping of generated symbols under CONFIG_KALLSYMS_ALL Building upon parts of the module stripping patch, this patch introduces similar stripping for vmlinux when CONFIG_KALLSYMS_ALL=y. Using CONFIG_KALLSYMS_STRIP_GENERATED reduces the overhead of CONFIG_KALLSYMS_ALL from 245k/310k to 65k/80k for the (i386/x86-64) kernels I tested with. The patch also does away with the need to special case the kallsyms- internal symbols by making them available even in the first linking stage. While it is a generated file, the patch includes the changes to scripts/genksyms/keywords.c_shipped, as I'm unsure what the procedure here is. Signed-off-by: Jan Beulich Signed-off-by: Sam Ravnborg --- Makefile | 47 ++++++--- arch/x86/scripts/strip-symbols | 1 + init/Kconfig | 7 ++ kernel/kallsyms.c | 16 ++- scripts/genksyms/keywords.c_shipped | 189 ++++++++++++++++++------------------ scripts/genksyms/keywords.gperf | 2 + scripts/kallsyms.c | 21 ++-- 7 files changed, 155 insertions(+), 128 deletions(-) create mode 100644 arch/x86/scripts/strip-symbols (limited to 'kernel') diff --git a/Makefile b/Makefile index 5dd0ed3b12c6..b3d1c8f1f4ce 100644 --- a/Makefile +++ b/Makefile @@ -604,6 +604,9 @@ export INSTALL_PATH ?= /boot MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) export MODLIB +strip-symbols := $(srctree)/scripts/strip-symbols \ + $(wildcard $(srctree)/arch/$(ARCH)/scripts/strip-symbols) + # # INSTALL_MOD_STRIP, if defined, will cause modules to be stripped while # they get installed. If INSTALL_MOD_STRIP is '1', then the default @@ -611,8 +614,10 @@ export MODLIB # be used as the option(s) to the objcopy command. ifdef INSTALL_MOD_STRIP ifeq ($(INSTALL_MOD_STRIP),1) -mod_strip_cmd = $(OBJCOPY) --strip-debug --strip-symbols \ - $(srctree)/scripts/strip-symbols --wildcard +mod_strip_cmd = $(OBJCOPY) --strip-debug +ifeq ($(CONFIG_KALLSYMS_ALL),$(CONFIG_KALLSYMS_STRIP_GENERATED)) +mod_strip_cmd += --wildcard $(addprefix --strip-symbols ,$(strip-symbols)) +endif else mod_strip_cmd = $(OBJCOPY) $(INSTALL_MOD_STRIP) endif # INSTALL_MOD_STRIP=1 @@ -747,6 +752,7 @@ last_kallsyms := 2 endif kallsyms.o := .tmp_kallsyms$(last_kallsyms).o +kallsyms.h := $(wildcard include/config/kallsyms/*.h) $(wildcard include/config/kallsyms/*/*.h) define verify_kallsyms $(Q)$(if $($(quiet)cmd_sysmap), \ @@ -771,24 +777,41 @@ endef # Generate .S file with all kernel symbols quiet_cmd_kallsyms = KSYM $@ - cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ - $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ + cmd_kallsyms = { test $* -eq 0 || $(NM) -n $<; } \ + | $(KALLSYMS) $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) >$@ -.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE +quiet_cmd_kstrip = STRIP $@ + cmd_kstrip = $(OBJCOPY) --wildcard $(addprefix --strip$(if $(CONFIG_RELOCATABLE),-unneeded)-symbols ,$(filter %/scripts/strip-symbols,$^)) $< $@ + +$(foreach n,0 1 2 3,.tmp_kallsyms$(n).o): KBUILD_AFLAGS += -Wa,--strip-local-absolute +$(foreach n,0 1 2 3,.tmp_kallsyms$(n).o): %.o: %.S scripts FORCE $(call if_changed_dep,as_o_S) -.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) +ifeq ($(CONFIG_KALLSYMS_STRIP_GENERATED),y) +strip-ext := .stripped +endif + +.tmp_kallsyms%.S: .tmp_vmlinux%$(strip-ext) $(KALLSYMS) $(kallsyms.h) $(call cmd,kallsyms) +# make -jN seems to have problems with intermediate files, see bug #3330. +.SECONDARY: $(foreach n,1 2 3,.tmp_vmlinux$(n).stripped) +.tmp_vmlinux%.stripped: .tmp_vmlinux% $(strip-symbols) $(kallsyms.h) + $(call cmd,kstrip) + +ifneq ($(CONFIG_DEBUG_INFO),y) +.tmp_vmlinux%: LDFLAGS_vmlinux += -S +endif # .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version -.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE - $(call if_changed_rule,ksym_ld) +.tmp_vmlinux%: $(vmlinux-lds) $(vmlinux-all) FORCE + $(if $(filter 1,$*),$(call if_changed_rule,ksym_ld),$(call if_changed,vmlinux__)) -.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE - $(call if_changed,vmlinux__) +.tmp_vmlinux0$(strip-ext): + $(Q)echo "placeholder" >$@ -.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE - $(call if_changed,vmlinux__) +.tmp_vmlinux1: .tmp_kallsyms0.o +.tmp_vmlinux2: .tmp_kallsyms1.o +.tmp_vmlinux3: .tmp_kallsyms2.o # Needs to visit scripts/ before $(KALLSYMS) can be used. $(KALLSYMS): scripts ; diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols new file mode 100644 index 000000000000..a2f1ccb827c7 --- /dev/null +++ b/arch/x86/scripts/strip-symbols @@ -0,0 +1 @@ +__cpu_vendor_dev_X86_VENDOR_* diff --git a/init/Kconfig b/init/Kconfig index f763762d544a..0f5af409fef1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -588,6 +588,13 @@ config KALLSYMS_ALL Say N. +config KALLSYMS_STRIP_GENERATED + bool "Strip machine generated symbols from kallsyms" + depends on KALLSYMS_ALL + default y + help + Say N if you want kallsyms to retain even machine generated symbols. + config KALLSYMS_EXTRA_PASS bool "Do an extra kallsyms pass" depends on KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 7b8b0f21a5b1..e694afa0eb8c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,20 +30,19 @@ #define all_var 0 #endif -/* These will be re-linked against their real values during the second link stage */ -extern const unsigned long kallsyms_addresses[] __attribute__((weak)); -extern const u8 kallsyms_names[] __attribute__((weak)); +extern const unsigned long kallsyms_addresses[]; +extern const u8 kallsyms_names[]; /* tell the compiler that the count isn't in the small data section if the arch * has one (eg: FRV) */ extern const unsigned long kallsyms_num_syms -__attribute__((weak, section(".rodata"))); + __attribute__((__section__(".rodata"))); -extern const u8 kallsyms_token_table[] __attribute__((weak)); -extern const u16 kallsyms_token_index[] __attribute__((weak)); +extern const u8 kallsyms_token_table[]; +extern const u16 kallsyms_token_index[]; -extern const unsigned long kallsyms_markers[] __attribute__((weak)); +extern const unsigned long kallsyms_markers[]; static inline int is_kernel_inittext(unsigned long addr) { @@ -168,9 +167,6 @@ static unsigned long get_symbol_pos(unsigned long addr, unsigned long symbol_start = 0, symbol_end = 0; unsigned long i, low, high, mid; - /* This kernel should never had been booted. */ - BUG_ON(!kallsyms_addresses); - /* do a binary search on the sorted kallsyms_addresses array */ low = 0; high = kallsyms_num_syms; diff --git a/scripts/genksyms/keywords.c_shipped b/scripts/genksyms/keywords.c_shipped index 971e0113ae7a..83484fe93ede 100644 --- a/scripts/genksyms/keywords.c_shipped +++ b/scripts/genksyms/keywords.c_shipped @@ -1,4 +1,4 @@ -/* ANSI-C code produced by gperf version 3.0.2 */ +/* ANSI-C code produced by gperf version 3.0.1 */ /* Command-line: gperf -L ANSI-C -a -C -E -g -H is_reserved_hash -k '1,3,$' -N is_reserved_word -p -t scripts/genksyms/keywords.gperf */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -32,7 +32,7 @@ #line 3 "scripts/genksyms/keywords.gperf" struct resword { const char *name; int token; }; -/* maximum key range = 62, duplicates = 0 */ +/* maximum key range = 64, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -46,32 +46,32 @@ is_reserved_hash (register const char *str, register unsigned int len) { static const unsigned char asso_values[] = { - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 5, - 65, 65, 65, 65, 65, 65, 35, 65, 65, 65, - 0, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 0, 65, 0, 65, 5, - 20, 15, 10, 30, 65, 15, 65, 65, 20, 0, - 10, 35, 20, 65, 10, 5, 0, 10, 5, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65 + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 0, + 67, 67, 67, 67, 67, 67, 15, 67, 67, 67, + 0, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 0, 67, 0, 67, 5, + 25, 20, 15, 30, 67, 15, 67, 67, 10, 0, + 10, 40, 20, 67, 10, 5, 0, 10, 15, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67 }; return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; } @@ -84,116 +84,119 @@ is_reserved_word (register const char *str, register unsigned int len) { enum { - TOTAL_KEYWORDS = 43, + TOTAL_KEYWORDS = 45, MIN_WORD_LENGTH = 3, MAX_WORD_LENGTH = 24, MIN_HASH_VALUE = 3, - MAX_HASH_VALUE = 64 + MAX_HASH_VALUE = 66 }; static const struct resword wordlist[] = { {""}, {""}, {""}, -#line 26 "scripts/genksyms/keywords.gperf" +#line 28 "scripts/genksyms/keywords.gperf" {"asm", ASM_KEYW}, {""}, -#line 8 "scripts/genksyms/keywords.gperf" +#line 10 "scripts/genksyms/keywords.gperf" {"__asm", ASM_KEYW}, {""}, -#line 9 "scripts/genksyms/keywords.gperf" +#line 11 "scripts/genksyms/keywords.gperf" {"__asm__", ASM_KEYW}, {""}, {""}, -#line 52 "scripts/genksyms/keywords.gperf" +#line 54 "scripts/genksyms/keywords.gperf" {"__typeof__", TYPEOF_KEYW}, {""}, -#line 12 "scripts/genksyms/keywords.gperf" +#line 14 "scripts/genksyms/keywords.gperf" {"__const", CONST_KEYW}, -#line 11 "scripts/genksyms/keywords.gperf" - {"__attribute__", ATTRIBUTE_KEYW}, #line 13 "scripts/genksyms/keywords.gperf" + {"__attribute__", ATTRIBUTE_KEYW}, +#line 15 "scripts/genksyms/keywords.gperf" {"__const__", CONST_KEYW}, -#line 18 "scripts/genksyms/keywords.gperf" +#line 20 "scripts/genksyms/keywords.gperf" {"__signed__", SIGNED_KEYW}, -#line 44 "scripts/genksyms/keywords.gperf" +#line 46 "scripts/genksyms/keywords.gperf" {"static", STATIC_KEYW}, -#line 20 "scripts/genksyms/keywords.gperf" - {"__volatile__", VOLATILE_KEYW}, -#line 39 "scripts/genksyms/keywords.gperf" + {""}, +#line 41 "scripts/genksyms/keywords.gperf" {"int", INT_KEYW}, -#line 32 "scripts/genksyms/keywords.gperf" +#line 34 "scripts/genksyms/keywords.gperf" {"char", CHAR_KEYW}, -#line 33 "scripts/genksyms/keywords.gperf" +#line 35 "scripts/genksyms/keywords.gperf" {"const", CONST_KEYW}, -#line 45 "scripts/genksyms/keywords.gperf" +#line 47 "scripts/genksyms/keywords.gperf" {"struct", STRUCT_KEYW}, -#line 24 "scripts/genksyms/keywords.gperf" +#line 26 "scripts/genksyms/keywords.gperf" {"__restrict__", RESTRICT_KEYW}, -#line 25 "scripts/genksyms/keywords.gperf" +#line 27 "scripts/genksyms/keywords.gperf" {"restrict", RESTRICT_KEYW}, -#line 23 "scripts/genksyms/keywords.gperf" - {"_restrict", RESTRICT_KEYW}, -#line 16 "scripts/genksyms/keywords.gperf" +#line 7 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL_GPL_FUTURE", EXPORT_SYMBOL_KEYW}, +#line 18 "scripts/genksyms/keywords.gperf" {"__inline__", INLINE_KEYW}, -#line 10 "scripts/genksyms/keywords.gperf" - {"__attribute", ATTRIBUTE_KEYW}, {""}, -#line 14 "scripts/genksyms/keywords.gperf" +#line 22 "scripts/genksyms/keywords.gperf" + {"__volatile__", VOLATILE_KEYW}, +#line 5 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW}, +#line 25 "scripts/genksyms/keywords.gperf" + {"_restrict", RESTRICT_KEYW}, + {""}, +#line 12 "scripts/genksyms/keywords.gperf" + {"__attribute", ATTRIBUTE_KEYW}, +#line 6 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, +#line 16 "scripts/genksyms/keywords.gperf" {"__extension__", EXTENSION_KEYW}, -#line 35 "scripts/genksyms/keywords.gperf" +#line 37 "scripts/genksyms/keywords.gperf" {"enum", ENUM_KEYW}, -#line 19 "scripts/genksyms/keywords.gperf" - {"__volatile", VOLATILE_KEYW}, -#line 36 "scripts/genksyms/keywords.gperf" +#line 8 "scripts/genksyms/keywords.gperf" + {"EXPORT_UNUSED_SYMBOL", EXPORT_SYMBOL_KEYW}, +#line 38 "scripts/genksyms/keywords.gperf" {"extern", EXTERN_KEYW}, {""}, -#line 17 "scripts/genksyms/keywords.gperf" +#line 19 "scripts/genksyms/keywords.gperf" {"__signed", SIGNED_KEYW}, -#line 7 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL_GPL_FUTURE", EXPORT_SYMBOL_KEYW}, - {""}, -#line 51 "scripts/genksyms/keywords.gperf" +#line 9 "scripts/genksyms/keywords.gperf" + {"EXPORT_UNUSED_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, +#line 49 "scripts/genksyms/keywords.gperf" + {"union", UNION_KEYW}, +#line 53 "scripts/genksyms/keywords.gperf" {"typeof", TYPEOF_KEYW}, -#line 46 "scripts/genksyms/keywords.gperf" +#line 48 "scripts/genksyms/keywords.gperf" {"typedef", TYPEDEF_KEYW}, -#line 15 "scripts/genksyms/keywords.gperf" +#line 17 "scripts/genksyms/keywords.gperf" {"__inline", INLINE_KEYW}, -#line 31 "scripts/genksyms/keywords.gperf" +#line 33 "scripts/genksyms/keywords.gperf" {"auto", AUTO_KEYW}, -#line 47 "scripts/genksyms/keywords.gperf" - {"union", UNION_KEYW}, - {""}, {""}, -#line 48 "scripts/genksyms/keywords.gperf" - {"unsigned", UNSIGNED_KEYW}, -#line 49 "scripts/genksyms/keywords.gperf" - {"void", VOID_KEYW}, -#line 42 "scripts/genksyms/keywords.gperf" - {"short", SHORT_KEYW}, +#line 21 "scripts/genksyms/keywords.gperf" + {"__volatile", VOLATILE_KEYW}, {""}, {""}, #line 50 "scripts/genksyms/keywords.gperf" - {"volatile", VOLATILE_KEYW}, - {""}, -#line 37 "scripts/genksyms/keywords.gperf" - {"float", FLOAT_KEYW}, -#line 34 "scripts/genksyms/keywords.gperf" - {"double", DOUBLE_KEYW}, + {"unsigned", UNSIGNED_KEYW}, {""}, -#line 5 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW}, - {""}, {""}, -#line 38 "scripts/genksyms/keywords.gperf" +#line 44 "scripts/genksyms/keywords.gperf" + {"short", SHORT_KEYW}, +#line 40 "scripts/genksyms/keywords.gperf" {"inline", INLINE_KEYW}, -#line 6 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, -#line 41 "scripts/genksyms/keywords.gperf" - {"register", REGISTER_KEYW}, {""}, -#line 22 "scripts/genksyms/keywords.gperf" +#line 52 "scripts/genksyms/keywords.gperf" + {"volatile", VOLATILE_KEYW}, +#line 42 "scripts/genksyms/keywords.gperf" + {"long", LONG_KEYW}, +#line 24 "scripts/genksyms/keywords.gperf" {"_Bool", BOOL_KEYW}, -#line 43 "scripts/genksyms/keywords.gperf" - {"signed", SIGNED_KEYW}, {""}, {""}, -#line 40 "scripts/genksyms/keywords.gperf" - {"long", LONG_KEYW} +#line 43 "scripts/genksyms/keywords.gperf" + {"register", REGISTER_KEYW}, +#line 51 "scripts/genksyms/keywords.gperf" + {"void", VOID_KEYW}, +#line 39 "scripts/genksyms/keywords.gperf" + {"float", FLOAT_KEYW}, +#line 36 "scripts/genksyms/keywords.gperf" + {"double", DOUBLE_KEYW}, + {""}, {""}, {""}, {""}, +#line 45 "scripts/genksyms/keywords.gperf" + {"signed", SIGNED_KEYW} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) diff --git a/scripts/genksyms/keywords.gperf b/scripts/genksyms/keywords.gperf index 5ef3733225fb..8abe7ab8d88f 100644 --- a/scripts/genksyms/keywords.gperf +++ b/scripts/genksyms/keywords.gperf @@ -5,6 +5,8 @@ struct resword { const char *name; int token; } EXPORT_SYMBOL, EXPORT_SYMBOL_KEYW EXPORT_SYMBOL_GPL, EXPORT_SYMBOL_KEYW EXPORT_SYMBOL_GPL_FUTURE, EXPORT_SYMBOL_KEYW +EXPORT_UNUSED_SYMBOL, EXPORT_SYMBOL_KEYW +EXPORT_UNUSED_SYMBOL_GPL, EXPORT_SYMBOL_KEYW __asm, ASM_KEYW __asm__, ASM_KEYW __attribute, ATTRIBUTE_KEYW diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index ad2434b26970..92758120a767 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -130,18 +130,9 @@ static int read_symbol(FILE *in, struct sym_entry *s) static int symbol_valid(struct sym_entry *s) { /* Symbols which vary between passes. Passes 1 and 2 must have - * identical symbol lists. The kallsyms_* symbols below are only added - * after pass 1, they would be included in pass 2 when --all-symbols is - * specified so exclude them to get a stable symbol list. + * identical symbol lists. */ static char *special_symbols[] = { - "kallsyms_addresses", - "kallsyms_num_syms", - "kallsyms_names", - "kallsyms_markers", - "kallsyms_token_table", - "kallsyms_token_index", - /* Exclude linker generated symbols which vary between passes */ "_SDA_BASE_", /* ppc */ "_SDA2_BASE_", /* ppc */ @@ -173,7 +164,9 @@ static int symbol_valid(struct sym_entry *s) } /* Exclude symbols which vary between passes. */ - if (strstr((char *)s->sym + offset, "_compiled.")) + if (strstr((char *)s->sym + offset, "_compiled.") || + strncmp((char*)s->sym + offset, "__compound_literal.", 19) == 0 || + strncmp((char*)s->sym + offset, "__compound_literal$", 19) == 0) return 0; for (i = 0; special_symbols[i]; i++) @@ -550,8 +543,10 @@ int main(int argc, char **argv) usage(); read_map(stdin); - sort_symbols(); - optimize_token_table(); + if (table_cnt) { + sort_symbols(); + optimize_token_table(); + } write_src(); return 0; -- cgit v1.2.3 From 412d0bb553c0227191f1bfd06100f561600bff22 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Dec 2008 01:43:25 +0100 Subject: tracing/function-graph-tracer: strip ending newlines on comments Impact: tracer output improvement Ending newlines are appended automatically on comments by the function graph tracer because the newline needs to be placed after the "*/" comment characters. So if the user puts an ending newline, we want to strip it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions_graph.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4bf39fcae97a..bc7d90850be5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -592,6 +592,12 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s, if (ent->flags & TRACE_FLAG_CONT) trace_seq_print_cont(s, iter); + /* Strip ending newline */ + if (s->buffer[s->len - 1] == '\n') { + s->buffer[s->len - 1] = '\0'; + s->len--; + } + ret = trace_seq_printf(s, " */\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; -- cgit v1.2.3