diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 21:06:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 21:06:18 -0700 |
commit | cdab10bf3285ee354e8f50254aa799631b7a95e0 (patch) | |
tree | e0b622a649d301346132b4e5ae91966856449fad | |
parent | 6fedc28076bbbb32edb722e80f9406a3d1d668a8 (diff) | |
parent | 15bf32398ad488c0df1cbaf16431422c87e4feea (diff) | |
download | lwn-cdab10bf3285ee354e8f50254aa799631b7a95e0.tar.gz lwn-cdab10bf3285ee354e8f50254aa799631b7a95e0.zip |
Merge tag 'selinux-pr-20211101' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux
Pull selinux updates from Paul Moore:
- Add LSM/SELinux/Smack controls and auditing for io-uring.
As usual, the individual commit descriptions have more detail, but we
were basically missing two things which we're adding here:
+ establishment of a proper audit context so that auditing of
io-uring ops works similarly to how it does for syscalls (with
some io-uring additions because io-uring ops are *not* syscalls)
+ additional LSM hooks to enable access control points for some of
the more unusual io-uring features, e.g. credential overrides.
The additional audit callouts and LSM hooks were done in conjunction
with the io-uring folks, based on conversations and RFC patches
earlier in the year.
- Fixup the binder credential handling so that the proper credentials
are used in the LSM hooks; the commit description and the code
comment which is removed in these patches are helpful to understand
the background and why this is the proper fix.
- Enable SELinux genfscon policy support for securityfs, allowing
improved SELinux filesystem labeling for other subsystems which make
use of securityfs, e.g. IMA.
* tag 'selinux-pr-20211101' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux:
security: Return xattr name from security_dentry_init_security()
selinux: fix a sock regression in selinux_ip_postroute_compat()
binder: use cred instead of task for getsecid
binder: use cred instead of task for selinux checks
binder: use euid from cred instead of using task
LSM: Avoid warnings about potentially unused hook variables
selinux: fix all of the W=1 build warnings
selinux: make better use of the nf_hook_state passed to the NF hooks
selinux: fix race condition when computing ocontext SIDs
selinux: remove unneeded ipv6 hook wrappers
selinux: remove the SELinux lockdown implementation
selinux: enable genfscon labeling for securityfs
Smack: Brutalist io_uring support
selinux: add support for the io_uring access controls
lsm,io_uring: add LSM hooks to io_uring
io_uring: convert io_uring to the secure anon inode interface
fs: add anon_inode_getfile_secure() similar to anon_inode_getfd_secure()
audit: add filtering for io_uring records
audit,io_uring,io-wq: add some basic audit support to io_uring
audit: prepare audit_context for use in calling contexts beyond syscalls
-rw-r--r-- | drivers/android/binder.c | 27 | ||||
-rw-r--r-- | drivers/android/binder_internal.h | 4 | ||||
-rw-r--r-- | fs/anon_inodes.c | 29 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 3 | ||||
-rw-r--r-- | fs/io-wq.c | 4 | ||||
-rw-r--r-- | fs/io_uring.c | 71 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 3 | ||||
-rw-r--r-- | include/linux/anon_inodes.h | 4 | ||||
-rw-r--r-- | include/linux/audit.h | 26 | ||||
-rw-r--r-- | include/linux/lsm_hook_defs.h | 22 | ||||
-rw-r--r-- | include/linux/lsm_hooks.h | 30 | ||||
-rw-r--r-- | include/linux/security.h | 55 | ||||
-rw-r--r-- | include/uapi/linux/audit.h | 4 | ||||
-rw-r--r-- | kernel/audit.h | 7 | ||||
-rw-r--r-- | kernel/audit_tree.c | 3 | ||||
-rw-r--r-- | kernel/audit_watch.c | 3 | ||||
-rw-r--r-- | kernel/auditfilter.c | 15 | ||||
-rw-r--r-- | kernel/auditsc.c | 468 | ||||
-rw-r--r-- | security/security.c | 35 | ||||
-rw-r--r-- | security/selinux/avc.c | 13 | ||||
-rw-r--r-- | security/selinux/hooks.c | 239 | ||||
-rw-r--r-- | security/selinux/include/classmap.h | 4 | ||||
-rw-r--r-- | security/selinux/netlabel.c | 7 | ||||
-rw-r--r-- | security/selinux/netport.c | 2 | ||||
-rw-r--r-- | security/selinux/ss/hashtab.c | 1 | ||||
-rw-r--r-- | security/selinux/ss/mls.c | 4 | ||||
-rw-r--r-- | security/selinux/ss/services.c | 176 | ||||
-rw-r--r-- | security/smack/smack_lsm.c | 46 |
28 files changed, 884 insertions, 421 deletions
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9edacc8b9768..26382e982c5e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2056,7 +2056,7 @@ static int binder_translate_binder(struct flat_binder_object *fp, ret = -EINVAL; goto done; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2102,7 +2102,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, proc->pid, thread->pid, fp->handle); return -EINVAL; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2190,7 +2190,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, ret = -EBADF; goto err_fget; } - ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file); + ret = security_binder_transfer_file(proc->cred, target_proc->cred, file); if (ret < 0) { ret = -EPERM; goto err_security; @@ -2595,8 +2595,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_invalid_target_handle; } - if (security_binder_transaction(proc->tsk, - target_proc->tsk) < 0) { + if (security_binder_transaction(proc->cred, + target_proc->cred) < 0) { return_error = BR_FAILED_REPLY; return_error_param = -EPERM; return_error_line = __LINE__; @@ -2711,7 +2711,7 @@ static void binder_transaction(struct binder_proc *proc, t->from = thread; else t->from = NULL; - t->sender_euid = task_euid(proc->tsk); + t->sender_euid = proc->cred->euid; t->to_proc = target_proc; t->to_thread = target_thread; t->code = tr->code; @@ -2722,16 +2722,7 @@ static void binder_transaction(struct binder_proc *proc, u32 secid; size_t added_size; - /* - * Arguably this should be the task's subjective LSM secid but - * we can't reliably access the subjective creds of a task - * other than our own so we must use the objective creds, which - * are safe to access. The downside is that if a task is - * temporarily overriding it's creds it will not be reflected - * here; however, it isn't clear that binder would handle that - * case well anyway. - */ - security_task_getsecid_obj(proc->tsk, &secid); + security_cred_getsecid(proc->cred, &secid); ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); if (ret) { return_error = BR_FAILED_REPLY; @@ -4353,6 +4344,7 @@ static void binder_free_proc(struct binder_proc *proc) } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); + put_cred(proc->cred); binder_stats_deleted(BINDER_STAT_PROC); kfree(proc); } @@ -4564,7 +4556,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp, ret = -EBUSY; goto out; } - ret = security_binder_set_context_mgr(proc->tsk); + ret = security_binder_set_context_mgr(proc->cred); if (ret < 0) goto out; if (uid_valid(context->binder_context_mgr_uid)) { @@ -5055,6 +5047,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + proc->cred = get_cred(filp->f_cred); INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->freeze_wait); proc->default_priority = task_nice(current); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 402c4d4362a8..d6b6b8cb7346 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -364,6 +364,9 @@ struct binder_ref { * (invariant after initialized) * @tsk task_struct for group_leader of process * (invariant after initialized) + * @cred struct cred associated with the `struct file` + * in binder_open() + * (invariant after initialized) * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -426,6 +429,7 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; + const struct cred *cred; struct hlist_node deferred_work_node; int deferred_work; int outstanding_txns; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index a280156138ed..e0c3e33c4177 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -148,6 +148,35 @@ struct file *anon_inode_getfile(const char *name, } EXPORT_SYMBOL_GPL(anon_inode_getfile); +/** + * anon_inode_getfile_secure - Like anon_inode_getfile(), but creates a new + * !S_PRIVATE anon inode rather than reuse the + * singleton anon inode and calls the + * inode_init_security_anon() LSM hook. This + * allows for both the inode to have its own + * security context and for the LSM to enforce + * policy on the inode's creation. + * + * @name: [in] name of the "class" of the new file + * @fops: [in] file operations for the new file + * @priv: [in] private data for the new file (will be file's private_data) + * @flags: [in] flags + * @context_inode: + * [in] the logical relationship with the new inode (optional) + * + * The LSM may use @context_inode in inode_init_security_anon(), but a + * reference to it is not held. Returns the newly created file* or an error + * pointer. See the anon_inode_getfile() documentation for more information. + */ +struct file *anon_inode_getfile_secure(const char *name, + const struct file_operations *fops, + void *priv, int flags, + const struct inode *context_inode) +{ + return __anon_inode_getfile(name, fops, priv, flags, + context_inode, true); +} + static int __anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 159a1ffa4f4b..fcf7dfdecf96 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1311,7 +1311,7 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, int err; err = security_dentry_init_security(dentry, mode, &dentry->d_name, - &as_ctx->sec_ctx, + &name, &as_ctx->sec_ctx, &as_ctx->sec_ctxlen); if (err < 0) { WARN_ON_ONCE(err != -EOPNOTSUPP); @@ -1335,7 +1335,6 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, * It only supports single security module and only selinux has * dentry_init_security hook. */ - name = XATTR_NAME_SELINUX; name_len = strlen(name); err = ceph_pagelist_reserve(pagelist, 4 * 2 + name_len + as_ctx->sec_ctxlen); diff --git a/fs/io-wq.c b/fs/io-wq.c index 38b33ad9e8cf..c51691262208 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -14,6 +14,7 @@ #include <linux/rculist_nulls.h> #include <linux/cpu.h> #include <linux/tracehook.h> +#include <linux/audit.h> #include <uapi/linux/io_uring.h> #include "io-wq.h" @@ -593,6 +594,8 @@ static int io_wqe_worker(void *data) snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); set_task_comm(current, buf); + audit_alloc_kernel(current); + while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { long ret; @@ -631,6 +634,7 @@ loop: io_worker_handle_work(worker); } + audit_free(current); io_worker_exit(worker); return 0; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 3a4af9799d9a..3ecd4b51510e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -79,6 +79,8 @@ #include <linux/pagemap.h> #include <linux/io_uring.h> #include <linux/tracehook.h> +#include <linux/audit.h> +#include <linux/security.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -912,6 +914,8 @@ struct io_op_def { unsigned needs_async_setup : 1; /* opcode is not supported by this kernel */ unsigned not_supported : 1; + /* skip auditing */ + unsigned audit_skip : 1; /* size of async data needed, if any */ unsigned short async_size; }; @@ -925,6 +929,7 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, .needs_async_setup = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_WRITEV] = { @@ -934,16 +939,19 @@ static const struct io_op_def io_op_defs[] = { .pollout = 1, .needs_async_setup = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_FSYNC] = { .needs_file = 1, + .audit_skip = 1, }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_WRITE_FIXED] = { @@ -952,15 +960,20 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_POLL_ADD] = { .needs_file = 1, .unbound_nonreg_file = 1, + .audit_skip = 1, + }, + [IORING_OP_POLL_REMOVE] = { + .audit_skip = 1, }, - [IORING_OP_POLL_REMOVE] = {}, [IORING_OP_SYNC_FILE_RANGE] = { .needs_file = 1, + .audit_skip = 1, }, [IORING_OP_SENDMSG] = { .needs_file = 1, @@ -978,18 +991,23 @@ static const struct io_op_def io_op_defs[] = { .async_size = sizeof(struct io_async_msghdr), }, [IORING_OP_TIMEOUT] = { + .audit_skip = 1, .async_size = sizeof(struct io_timeout_data), }, [IORING_OP_TIMEOUT_REMOVE] = { /* used by timeout updates' prep() */ + .audit_skip = 1, }, [IORING_OP_ACCEPT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, }, - [IORING_OP_ASYNC_CANCEL] = {}, + [IORING_OP_ASYNC_CANCEL] = { + .audit_skip = 1, + }, [IORING_OP_LINK_TIMEOUT] = { + .audit_skip = 1, .async_size = sizeof(struct io_timeout_data), }, [IORING_OP_CONNECT] = { @@ -1004,14 +1022,19 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_OPENAT] = {}, [IORING_OP_CLOSE] = {}, - [IORING_OP_FILES_UPDATE] = {}, - [IORING_OP_STATX] = {}, + [IORING_OP_FILES_UPDATE] = { + .audit_skip = 1, + }, + [IORING_OP_STATX] = { + .audit_skip = 1, + }, [IORING_OP_READ] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_WRITE] = { @@ -1020,39 +1043,50 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, + .audit_skip = 1, .async_size = sizeof(struct io_async_rw), }, [IORING_OP_FADVISE] = { .needs_file = 1, + .audit_skip = 1, }, [IORING_OP_MADVISE] = {}, [IORING_OP_SEND] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, + .audit_skip = 1, }, [IORING_OP_RECV] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, + .audit_skip = 1, }, [IORING_OP_OPENAT2] = { }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, + .audit_skip = 1, }, [IORING_OP_SPLICE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, + .audit_skip = 1, + }, + [IORING_OP_PROVIDE_BUFFERS] = { + .audit_skip = 1, + }, + [IORING_OP_REMOVE_BUFFERS] = { + .audit_skip = 1, }, - [IORING_OP_PROVIDE_BUFFERS] = {}, - [IORING_OP_REMOVE_BUFFERS] = {}, [IORING_OP_TEE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, + .audit_skip = 1, }, [IORING_OP_SHUTDOWN] = { .needs_file = 1, @@ -6581,6 +6615,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) creds = override_creds(req->creds); + if (!io_op_defs[req->opcode].audit_skip) + audit_uring_entry(req->opcode); + switch (req->opcode) { case IORING_OP_NOP: ret = io_nop(req, issue_flags); @@ -6696,6 +6733,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) break; } + if (!io_op_defs[req->opcode].audit_skip) + audit_uring_exit(!ret, ret); + if (creds) revert_creds(creds); if (ret) @@ -7090,10 +7130,17 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, personality = READ_ONCE(sqe->personality); if (personality) { + int ret; + req->creds = xa_load(&ctx->personalities, personality); if (!req->creds) return -EINVAL; get_cred(req->creds); + ret = security_uring_override_creds(req->creds); + if (ret) { + put_cred(req->creds); + return ret; + } req->flags |= REQ_F_CREDS; } @@ -7400,6 +7447,8 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpu_online_mask); current->flags |= PF_NO_SETAFFINITY; + audit_alloc_kernel(current); + mutex_lock(&sqd->lock); while (1) { bool cap_entries, sqt_spin = false; @@ -7465,6 +7514,8 @@ static int io_sq_thread(void *data) io_run_task_work(); mutex_unlock(&sqd->lock); + audit_free(current); + complete(&sqd->exited); do_exit(0); } @@ -8622,6 +8673,10 @@ static __cold int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_sq_data *sqd; bool attached; + ret = security_uring_sqpoll(); + if (ret) + return ret; + sqd = io_get_sq_data(p, &attached); if (IS_ERR(sqd)) { ret = PTR_ERR(sqd); @@ -10276,8 +10331,8 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) return ERR_PTR(ret); #endif - file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, - O_RDWR | O_CLOEXEC); + file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + O_RDWR | O_CLOEXEC, NULL); #if defined(CONFIG_UNIX) if (IS_ERR(file)) { sock_release(ctx->ring_sock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e1214bb6b7ee..459860aa8fd7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -127,7 +127,8 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, return NULL; err = security_dentry_init_security(dentry, sattr->ia_mode, - &dentry->d_name, (void **)&label->label, &label->len); + &dentry->d_name, NULL, + (void **)&label->label, &label->len); if (err == 0) return label; diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 71881a2b6f78..5deaddbd7927 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -15,6 +15,10 @@ struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_secure(const char *name, + const struct file_operations *fops, + void *priv, int flags, + const struct inode *context_inode); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); int anon_inode_getfd_secure(const char *name, diff --git a/include/linux/audit.h b/include/linux/audit.h index 82b7c1116a85..d656a06dd909 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t) /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); +extern int audit_alloc_kernel(struct task_struct *task); extern void __audit_free(struct task_struct *task); +extern void __audit_uring_entry(u8 op); +extern void __audit_uring_exit(int success, long code); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); @@ -323,6 +326,21 @@ static inline void audit_free(struct task_struct *task) if (unlikely(task->audit_context)) __audit_free(task); } +static inline void audit_uring_entry(u8 op) +{ + /* + * We intentionally check audit_context() before audit_enabled as most + * Linux systems (as of ~2021) rely on systemd which forces audit to + * be enabled regardless of the user's audit configuration. + */ + if (unlikely(audit_context() && audit_enabled)) + __audit_uring_entry(op); +} +static inline void audit_uring_exit(int success, long code) +{ + if (unlikely(!audit_dummy_context())) + __audit_uring_exit(success, code); +} static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) @@ -554,8 +572,16 @@ static inline int audit_alloc(struct task_struct *task) { return 0; } +static inline int audit_alloc_kernel(struct task_struct *task) +{ + return 0; +} static inline void audit_free(struct task_struct *task) { } +static inline void audit_uring_entry(u8 op) +{ } +static inline void audit_uring_exit(int success, long code) +{ } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2adeea44c0d5..a9ac70ae01ab 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -26,13 +26,13 @@ * #undef LSM_HOOK * }; */ -LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) -LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, - struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) +LSM_HOOK(int, 0, binder_transaction, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, + const struct cred *to, struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) @@ -83,7 +83,8 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, - int mode, const struct qstr *name, void **ctx, u32 *ctxlen) + int mode, const struct qstr *name, const char **xattr_name, + void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -402,3 +403,8 @@ LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ + +#ifdef CONFIG_IO_URING +LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) +LSM_HOOK(int, 0, uring_sqpoll, void) +#endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..0bada4df23fc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -196,6 +196,9 @@ * @dentry dentry to use in calculating the context. * @mode mode used to determine resource type. * @name name of the last path component used to create file + * @xattr_name pointer to place the pointer to security xattr name. + * Caller does not have to free the resulting pointer. Its + * a pointer to static string. * @ctx pointer to place the pointer to the resulting context in. * @ctxlen point to place the length of the resulting context. * @dentry_create_files_as: @@ -1313,22 +1316,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1557,6 +1560,19 @@ * Read perf_event security info if allowed. * @perf_event_write: * Write perf_event security info if allowed. + * + * Security hooks for io_uring + * + * @uring_override_creds: + * Check if the current task, executing an io_uring operation, is allowed + * to override it's credentials with @new. + * + * @new: the new creds to use + * + * @uring_sqpoll: + * Check whether the current task is allowed to spawn a io_uring polling + * thread (IORING_SETUP_SQPOLL). + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/security.h b/include/linux/security.h index 5b7288521300..7e0ba63b5dde 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -258,13 +258,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -317,8 +317,9 @@ int security_add_mnt_opt(const char *option, const char *val, int len, void **mnt_opts); int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen); + const struct qstr *name, + const char **xattr_name, void **ctx, + u32 *ctxlen); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -508,25 +509,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -739,6 +740,7 @@ static inline void security_inode_free(struct inode *inode) static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, + const char **xattr_name, void **ctx, u32 *ctxlen) { @@ -1041,6 +1043,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; @@ -2038,4 +2045,20 @@ static inline int security_perf_event_write(struct perf_event *event) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_PERF_EVENTS */ +#ifdef CONFIG_IO_URING +#ifdef CONFIG_SECURITY +extern int security_uring_override_creds(const struct cred *new); +extern int security_uring_sqpoll(void); +#else +static inline int security_uring_override_creds(const struct cred *new) +{ + return 0; +} +static inline int security_uring_sqpoll(void) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_IO_URING */ + #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index daa481729e9b..ecf1edd2affa 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,7 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_URINGOP 1336 /* io_uring operation */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -166,8 +167,9 @@ #define AUDIT_FILTER_EXCLUDE 0x05 /* Apply rule before record creation */ #define AUDIT_FILTER_TYPE AUDIT_FILTER_EXCLUDE /* obsolete misleading naming */ #define AUDIT_FILTER_FS 0x06 /* Apply rule at __audit_inode_child */ +#define AUDIT_FILTER_URING_EXIT 0x07 /* Apply rule at io_uring op exit */ -#define AUDIT_NR_FILTERS 7 +#define AUDIT_NR_FILTERS 8 #define AUDIT_FILTER_PREPEND 0x10 /* Prepend to front of list */ diff --git a/kernel/audit.h b/kernel/audit.h index d6a2c899a8db..d1161e3b83e2 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -100,10 +100,15 @@ struct audit_proctitle { /* The per-task audit context. */ struct audit_context { int dummy; /* must be the first element */ - int in_syscall; /* 1 if task is in a syscall */ + enum { + AUDIT_CTX_UNUSED, /* audit_context is currently unused */ + AUDIT_CTX_SYSCALL, /* in use by syscall */ + AUDIT_CTX_URING, /* in use by io_uring */ + } context; enum audit_state state, current_state; unsigned int serial; /* serial number for record */ int major; /* syscall number */ + int uring_op; /* uring operation */ struct timespec64 ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ long return_code;/* syscall return code */ diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2cd7b5694422..338c53a961c5 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -726,7 +726,8 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) { if (pathname[0] != '/' || - rule->listnr != AUDIT_FILTER_EXIT || + (rule->listnr != AUDIT_FILTER_EXIT && + rule->listnr != AUDIT_FILTER_URING_EXIT) || op != Audit_equal || rule->inode_f || rule->watch || rule->tree) return -EINVAL; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 2acf7ca49154..698b62b4a2ec 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -183,7 +183,8 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op) return -EOPNOTSUPP; if (path[0] != '/' || path[len-1] == '/' || - krule->listnr != AUDIT_FILTER_EXIT || + (krule->listnr != AUDIT_FILTER_EXIT && + krule->listnr != AUDIT_FILTER_URING_EXIT) || op != Audit_equal || krule->inode_f || krule->watch || krule->tree) return -EINVAL; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index db2c6b59dfc3..d75acb014ccd 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -44,7 +44,8 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { LIST_HEAD_INIT(audit_filter_list[4]), LIST_HEAD_INIT(audit_filter_list[5]), LIST_HEAD_INIT(audit_filter_list[6]), -#if AUDIT_NR_FILTERS != 7 + LIST_HEAD_INIT(audit_filter_list[7]), +#if AUDIT_NR_FILTERS != 8 #error Fix audit_filter_list initialiser #endif }; @@ -56,6 +57,7 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = { LIST_HEAD_INIT(audit_rules_list[4]), LIST_HEAD_INIT(audit_rules_list[5]), LIST_HEAD_INIT(audit_rules_list[6]), + LIST_HEAD_INIT(audit_rules_list[7]), }; DEFINE_MUTEX(audit_filter_mutex); @@ -151,7 +153,8 @@ char *audit_unpack_string(void **bufp, size_t *remain, size_t len) static inline int audit_to_inode(struct audit_krule *krule, struct audit_field *f) { - if (krule->listnr != AUDIT_FILTER_EXIT || + if ((krule->listnr != AUDIT_FILTER_EXIT && + krule->listnr != AUDIT_FILTER_URING_EXIT) || krule->inode_f || krule->watch || krule->tree || (f->op != Audit_equal && f->op != Audit_not_equal)) return -EINVAL; @@ -248,6 +251,7 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule_data * pr_err("AUDIT_FILTER_ENTRY is deprecated\n"); goto exit_err; case AUDIT_FILTER_EXIT: + case AUDIT_FILTER_URING_EXIT: case AUDIT_FILTER_TASK: #endif case AUDIT_FILTER_USER: @@ -332,6 +336,10 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) if (entry->rule.listnr != AUDIT_FILTER_FS) return -EINVAL; break; + case AUDIT_PERM: + if (entry->rule.listnr == AUDIT_FILTER_URING_EXIT) + return -EINVAL; + break; } switch (entry->rule.listnr) { @@ -980,7 +988,8 @@ static inline int audit_add_rule(struct audit_entry *entry) } entry->rule.prio = ~0ULL; - if (entry->rule.listnr == AUDIT_FILTER_EXIT) { + if (entry->rule.listnr == AUDIT_FILTER_EXIT || + entry->rule.listnr == AUDIT_FILTER_URING_EXIT) { if (entry->rule.flags & AUDIT_FILTER_PREPEND) entry->rule.prio = ++prio_high; else diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b1cb1dbf7417..c131985c3e6d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -805,6 +805,34 @@ static int audit_in_mask(const struct audit_krule *rule, unsigned long val) return rule->mask[word] & bit; } +/** + * audit_filter_uring - apply filters to an io_uring operation + * @tsk: associated task + * @ctx: audit context + */ +static void audit_filter_uring(struct task_struct *tsk, + struct audit_context *ctx) +{ + struct audit_entry *e; + enum audit_state state; + + if (auditd_test_task(tsk)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_URING_EXIT], + list) { + if (audit_in_mask(&e->rule, ctx->uring_op) && + audit_filter_rules(tsk, &e->rule, ctx, NULL, &state, + false)) { + rcu_read_unlock(); + ctx->current_state = state; + return; + } + } + rcu_read_unlock(); +} + /* At syscall exit time, this filter is called if the audit_state is * not low enough that auditing cannot take place, but is also not * high enough that we already know we have to write an audit record @@ -915,10 +943,81 @@ static inline void audit_free_aux(struct audit_context *context) context->aux = aux->next; kfree(aux); } + context->aux = NULL; while ((aux = context->aux_pids)) { context->aux_pids = aux->next; kfree(aux); } + context->aux_pids = NULL; +} + +/** + * audit_reset_context - reset a audit_context structure + * @ctx: the audit_context to reset + * + * All fields in the audit_context will be reset to an initial state, all + * references held by fields will be dropped, and private memory will be + * released. When this function returns the audit_context will be suitable + * for reuse, so long as the passed context is not NULL or a dummy context. + */ +static void audit_reset_context(struct audit_context *ctx) +{ + if (!ctx) + return; + + /* if ctx is non-null, reset the "ctx->state" regardless */ + ctx->context = AUDIT_CTX_UNUSED; + if (ctx->dummy) + return; + + /* + * NOTE: It shouldn't matter in what order we release the fields, so + * release them in the order in which they appear in the struct; + * this gives us some hope of quickly making sure we are + * resetting the audit_context properly. + * + * Other things worth mentioning: + * - we don't reset "dummy" + * - we don't reset "state", we do reset "current_state" + * - we preserve "filterkey" if "state" is AUDIT_STATE_RECORD + * - much of this is likely overkill, but play it safe for now + * - we really need to work on improving the audit_context struct + */ + + ctx->current_state = ctx->state; + ctx->serial = 0; + ctx->major = 0; + ctx->uring_op = 0; + ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 }; + memset(ctx->argv, 0, sizeof(ctx->argv)); + ctx->return_code = 0; + ctx->prio = (ctx->state == AUDIT_STATE_RECORD ? ~0ULL : 0); + ctx->return_valid = AUDITSC_INVALID; + audit_free_names(ctx); + if (ctx->state != AUDIT_STATE_RECORD) { + kfree(ctx->filterkey); + ctx->filterkey = NULL; + } + audit_free_aux(ctx); + kfree(ctx->sockaddr); + ctx->sockaddr = NULL; + ctx->sockaddr_len = 0; + ctx->pid = ctx->ppid = 0; + ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); + ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); + ctx->personality = 0; + ctx->arch = 0; + ctx->target_pid = 0; + ctx->target_auid = ctx->target_uid = KUIDT_INIT(0); + ctx->target_sessionid = 0; + ctx->target_sid = 0; + ctx->target_comm[0] = '\0'; + unroll_tree_refs(ctx, NULL, 0); + WARN_ON(!list_empty(&ctx->killed_trees)); + ctx->type = 0; + audit_free_module(ctx); + ctx->fds[0] = -1; + audit_proctitle_free(ctx); } static inline struct audit_context *audit_alloc_context(enum audit_state state) @@ -928,6 +1027,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return NULL; + context->context = AUDIT_CTX_UNUSED; context->state = state; context->prio = state == AUDIT_STATE_RECORD ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); @@ -953,7 +1053,7 @@ int audit_alloc(struct task_struct *tsk) char *key = NULL; if (likely(!audit_ever_enabled)) - return 0; /* Return if not auditing. */ + return 0; state = audit_filter_task(tsk, &key); if (state == AUDIT_STATE_DISABLED) { @@ -973,16 +1073,37 @@ int audit_alloc(struct task_struct *tsk) return 0; } +/** + * audit_alloc_kernel - allocate an audit_context for a kernel task + * @tsk: the kernel task + * + * Similar to the audit_alloc() function, but intended for kernel private + * threads. Returns zero on success, negative values on failure. + */ +int audit_alloc_kernel(struct task_struct *tsk) +{ + /* + * At the moment we are just going to call into audit_alloc() to + * simplify the code, but there two things to keep in mind with this + * approach: + * + * 1. Filtering internal kernel tasks is a bit laughable in almost all + * cases, but there is at least one case where there is a benefit: + * the '-a task,never' case allows the admin to effectively disable + * task auditing at runtime. + * + * 2. The {set,clear}_task_syscall_work() ops likely have zero effect + * on these internal kernel tasks, but they probably don't hurt either. + */ + return audit_alloc(tsk); +} + static inline void audit_free_context(struct audit_context *context) { - audit_free_module(context); - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); + /* resetting is extra work, but it is likely just noise */ + audit_reset_context(context); free_tree_refs(context); - audit_free_aux(context); kfree(context->filterkey); - kfree(context->sockaddr); - audit_proctitle_free(context); kfree(context); } @@ -1479,6 +1600,44 @@ out: audit_log_end(ab); } +/** + * audit_log_uring - generate a AUDIT_URINGOP record + * @ctx: the audit context + */ +static void audit_log_uring(struct audit_context *ctx) +{ + struct audit_buffer *ab; + const struct cred *cred; + + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP); + if (!ab) + return; + cred = current_cred(); + audit_log_format(ab, "uring_op=%d", ctx->uring_op); + if (ctx->return_valid != AUDITSC_INVALID) + audit_log_format(ab, " success=%s exit=%ld", + (ctx->return_valid == AUDITSC_SUCCESS ? + "yes" : "no"), + ctx->return_code); + audit_log_format(ab, + " items=%d" + " ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u" + " fsuid=%u egid=%u sgid=%u fsgid=%u", + ctx->name_count, + task_ppid_nr(current), task_tgid_nr(current), + from_kuid(&init_user_ns, cred->uid), + from_kgid(&init_user_ns, cred->gid), + from_kuid(&init_user_ns, cred->euid), + from_kuid(&init_user_ns, cred->suid), + from_kuid(&init_user_ns, cred->fsuid), + from_kgid(&init_user_ns, cred->egid), + from_kgid(&init_user_ns, cred->sgid), + from_kgid(&init_user_ns, cred->fsgid)); + audit_log_task_context(ab); + audit_log_key(ab, ctx->filterkey); + audit_log_end(ab); +} + static void audit_log_exit(void) { int i, call_panic = 0; @@ -1489,29 +1648,38 @@ static void audit_log_exit(void) context->personality = current->personality; - ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); - if (!ab) - return; /* audit_panic has been called */ - audit_log_format(ab, "arch=%x syscall=%d", - context->arch, context->major); - if (context->personality != PER_LINUX) - audit_log_format(ab, " per=%lx", context->personality); - if (context->return_valid != AUDITSC_INVALID) - audit_log_format(ab, " success=%s exit=%ld", - (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", - context->return_code); - - audit_log_format(ab, - " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", - context->argv[0], - context->argv[1], - context->argv[2], - context->argv[3], - context->name_count); - - audit_log_task_info(ab); - audit_log_key(ab, context->filterkey); - audit_log_end(ab); + switch (context->context) { + case AUDIT_CTX_SYSCALL: + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); + if (!ab) + return; + audit_log_format(ab, "arch=%x syscall=%d", + context->arch, context->major); + if (context->personality != PER_LINUX) + audit_log_format(ab, " per=%lx", context->personality); + if (context->return_valid != AUDITSC_INVALID) + audit_log_format(ab, " success=%s exit=%ld", + (context->return_valid == AUDITSC_SUCCESS ? + "yes" : "no"), + context->return_code); + audit_log_format(ab, + " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", + context->argv[0], + context->argv[1], + context->argv[2], + context->argv[3], + context->name_count); + audit_log_task_info(ab); + audit_log_key(ab, context->filterkey); + audit_log_end(ab); + break; + case AUDIT_CTX_URING: + audit_log_uring(context); + break; + default: + BUG(); + break; + } for (aux = context->aux; aux; aux = aux->next) { @@ -1602,21 +1770,22 @@ static void audit_log_exit(void) audit_log_name(context, n, NULL, i++, &call_panic); } - audit_log_proctitle(); + if (context->context == AUDIT_CTX_SYSCALL) + audit_log_proctitle(); /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); if (ab) audit_log_end(ab); if (call_panic) - audit_panic("error converting sid to string"); + audit_panic("error in audit_log_exit()"); } /** * __audit_free - free a per-task audit context * @tsk: task whose audit context block to free * - * Called from copy_process and do_exit + * Called from copy_process, do_exit, and the io_uring code */ void __audit_free(struct task_struct *tsk) { @@ -1625,6 +1794,7 @@ void __audit_free(struct task_struct *tsk) if (!context) return; + /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); @@ -1633,14 +1803,21 @@ void __audit_free(struct task_struct *tsk) * random task_struct that doesn't doesn't have any meaningful data we * need to log via audit_log_exit(). */ - if (tsk == current && !context->dummy && context->in_syscall) { + if (tsk == current && !context->dummy) { context->return_valid = AUDITSC_INVALID; context->return_code = 0; - - audit_filter_syscall(tsk, context); - audit_filter_inodes(tsk, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_exit(); + if (context->context == AUDIT_CTX_SYSCALL) { + audit_filter_syscall(tsk, context); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_exit(); + } else if (context->context == AUDIT_CTX_URING) { + /* TODO: verify this case is real and valid */ + audit_filter_uring(tsk, context); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_uring(context); + } } audit_set_context(tsk, NULL); @@ -1648,6 +1825,131 @@ void __audit_free(struct task_struct *tsk) } /** + * audit_return_fixup - fixup the return codes in the audit_context + * @ctx: the audit_context + * @success: true/false value to indicate if the operation succeeded or not + * @code: operation return code + * + * We need to fixup the return code in the audit logs if the actual return + * codes are later going to be fixed by the arch specific signal handlers. + */ +static void audit_return_fixup(struct audit_context *ctx, + int success, long code) +{ + /* + * This is actually a test for: + * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || + * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) + * + * but is faster than a bunch of || + */ + if (unlikely(code <= -ERESTARTSYS) && + (code >= -ERESTART_RESTARTBLOCK) && + (code != -ENOIOCTLCMD)) + ctx->return_code = -EINTR; + else + ctx->return_code = code; + ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE); +} + +/** + * __audit_uring_entry - prepare the kernel task's audit context for io_uring + * @op: the io_uring opcode + * + * This is similar to audit_syscall_entry() but is intended for use by io_uring + * operations. This function should only ever be called from + * audit_uring_entry() as we rely on the audit context checking present in that + * function. + */ +void __audit_uring_entry(u8 op) +{ + struct audit_context *ctx = audit_context(); + + if (ctx->state == AUDIT_STATE_DISABLED) + return; + + /* + * NOTE: It's possible that we can be called from the process' context + * before it returns to userspace, and before audit_syscall_exit() + * is called. In this case there is not much to do, just record + * the io_uring details and return. + */ + ctx->uring_op = op; + if (ctx->context == AUDIT_CTX_SYSCALL) + return; + + ctx->dummy = !audit_n_rules; + if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD) + ctx->prio = 0; + + ctx->context = AUDIT_CTX_URING; + ctx->current_state = ctx->state; + ktime_get_coarse_real_ts64(&ctx->ctime); +} + +/** + * __audit_uring_exit - wrap up the kernel task's audit context after io_uring + * @success: true/false value to indicate if the operation succeeded or not + * @code: operation return code + * + * This is similar to audit_syscall_exit() but is intended for use by io_uring + * operations. This function should only ever be called from + * audit_uring_exit() as we rely on the audit context checking present in that + * function. + */ +void __audit_uring_exit(int success, long code) +{ + struct audit_context *ctx = audit_context(); + + if (ctx->context == AUDIT_CTX_SYSCALL) { + /* + * NOTE: See the note in __audit_uring_entry() about the case + * where we may be called from process context before we + * return to userspace via audit_syscall_exit(). In this + * case we simply emit a URINGOP record and bail, the + * normal syscall exit handling will take care of + * everything else. + * It is also worth mentioning that when we are called, + * the current process creds may differ from the creds + * used during the normal syscall processing; keep that + * in mind if/when we move the record generation code. + */ + + /* + * We need to filter on the syscall info here to decide if we + * should emit a URINGOP record. I know it seems odd but this + * solves the problem where users have a filter to block *all* + * syscall records in the "exit" filter; we want to preserve + * the behavior here. + */ + audit_filter_syscall(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + audit_filter_uring(current, ctx); + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + return; + + audit_log_uring(ctx); + return; + } + + /* this may generate CONFIG_CHANGE records */ + if (!list_empty(&ctx->killed_trees)) + audit_kill_trees(ctx); + + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_uring(current, ctx); + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + goto out; + audit_return_fixup(ctx, success, code); + audit_log_exit(); + +out: + audit_reset_context(ctx); +} + +/** * __audit_syscall_entry - fill in an audit record at syscall entry * @major: major syscall type (function) * @a1: additional syscall register 1 @@ -1672,7 +1974,12 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, if (!audit_enabled || !context) return; - BUG_ON(context->in_syscall || context->name_count); + WARN_ON(context->context != AUDIT_CTX_UNUSED); + WARN_ON(context->name_count); + if (context->context != AUDIT_CTX_UNUSED || context->name_count) { + audit_panic("unrecoverable error in audit_syscall_entry()"); + return; + } state = context->state; if (state == AUDIT_STATE_DISABLED) @@ -1691,10 +1998,8 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, context->argv[1] = a2; context->argv[2] = a3; context->argv[3] = a4; - context->serial = 0; - context->in_syscall = 1; + context->context = AUDIT_CTX_SYSCALL; context->current_state = state; - context->ppid = 0; ktime_get_coarse_real_ts64(&context->ctime); } @@ -1711,63 +2016,27 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, */ void __audit_syscall_exit(int success, long return_code) { - struct audit_context *context; + struct audit_context *context = audit_context(); - context = audit_context(); - if (!context) - return; + if (!context || context->dummy || + context->context != AUDIT_CTX_SYSCALL) + goto out; + /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); - if (!context->dummy && context->in_syscall) { - if (success) - context->return_valid = AUDITSC_SUCCESS; - else - context->return_valid = AUDITSC_FAILURE; + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_syscall(current, context); + audit_filter_inodes(current, context); + if (context->current_state < AUDIT_STATE_RECORD) + goto out; - /* - * we need to fix up the return code in the audit logs if the - * actual return codes are later going to be fixed up by the - * arch specific signal handlers - * - * This is actually a test for: - * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || - * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) - * - * but is faster than a bunch of || - */ - if (unlikely(return_code <= -ERESTARTSYS) && - (return_code >= -ERESTART_RESTARTBLOCK) && - (return_code != -ENOIOCTLCMD)) - context->return_code = -EINTR; - else - context->return_code = return_code; - - audit_filter_syscall(current, context); - audit_filter_inodes(current, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_exit(); - } - - context->in_syscall = 0; - context->prio = context->state == AUDIT_STATE_RECORD ? ~0ULL : 0; - - audit_free_module(context); - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); - audit_free_aux(context); - context->aux = NULL; - context->aux_pids = NULL; - context->target_pid = 0; - context->target_sid = 0; - context->sockaddr_len = 0; - context->type = 0; - context->fds[0] = -1; - if (context->state != AUDIT_STATE_RECORD) { - kfree(context->filterkey); - context->filterkey = NULL; - } + audit_return_fixup(context, success, return_code); + audit_log_exit(); + +out: + audit_reset_context(context); } static inline void handle_one(const struct inode *inode) @@ -1919,7 +2188,7 @@ void __audit_getname(struct filename *name) struct audit_context *context = audit_context(); struct audit_names *n; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); @@ -1991,7 +2260,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); @@ -2109,7 +2378,7 @@ void __audit_inode_child(struct inode *parent, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); @@ -2208,7 +2477,7 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { - if (!ctx->in_syscall) + if (ctx->context == AUDIT_CTX_UNUSED) return 0; if (!ctx->serial) ctx->serial = audit_serial(); @@ -2706,8 +2975,7 @@ void audit_seccomp_actions_logged(const char *names, const char *old_names, struct list_head *audit_killed_trees(void) { struct audit_context *ctx = audit_context(); - - if (likely(!ctx || !ctx->in_syscall)) + if (likely(!ctx || ctx->context == AUDIT_CTX_UNUSED)) return NULL; return &ctx->killed_trees; } diff --git a/security/security.c b/security/security.c index 9ffa9e9c5c55..95e30fadba78 100644 --- a/security/security.c +++ b/security/security.c @@ -706,7 +706,7 @@ static int lsm_superblock_alloc(struct super_block *sb) #define LSM_RET_DEFAULT(NAME) (NAME##_default) #define DECLARE_LSM_RET_DEFAULT_void(DEFAULT, NAME) #define DECLARE_LSM_RET_DEFAULT_int(DEFAULT, NAME) \ - static const int LSM_RET_DEFAULT(NAME) = (DEFAULT); + static const int __maybe_unused LSM_RET_DEFAULT(NAME) = (DEFAULT); #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ DECLARE_LSM_RET_DEFAULT_##RET(DEFAULT, NAME) @@ -747,25 +747,25 @@ static int lsm_superblock_alloc(struct super_block *sb) /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr) +int security_binder_set_context_mgr(const struct cred *mgr) { return call_int_hook(binder_set_context_mgr, 0, mgr); } -int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +int security_binder_transaction(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transaction, 0, from, to); } -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transfer_binder, 0, from, to); } -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file) +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return call_int_hook(binder_transfer_file, 0, from, to, file); } @@ -1052,11 +1052,12 @@ void security_inode_free(struct inode *inode) } int security_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen) + const struct qstr *name, + const char **xattr_name, void **ctx, + u32 *ctxlen) { return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, - name, ctx, ctxlen); + name, xattr_name, ctx, ctxlen); } EXPORT_SYMBOL(security_dentry_init_security); @@ -2625,3 +2626,15 @@ int security_perf_event_write(struct perf_event *event) return call_int_hook(perf_event_write, 0, event); } #endif /* CONFIG_PERF_EVENTS */ + +#ifdef CONFIG_IO_URING +int security_uring_override_creds(const struct cred *new) +{ + return call_int_hook(uring_override_creds, 0, new); +} + +int security_uring_sqpoll(void) +{ + return call_int_hook(uring_sqpoll, 0); +} +#endif /* CONFIG_IO_URING */ diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 97f4c944a20f..abcd9740d10f 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -547,6 +547,7 @@ static inline struct avc_node *avc_search_node(struct selinux_avc *avc, /** * avc_lookup - Look up an AVC entry. + * @avc: the access vector cache * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -597,6 +598,7 @@ static int avc_latest_notif_update(struct selinux_avc *avc, /** * avc_insert - Insert an AVC entry. + * @avc: the access vector cache * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -825,9 +827,14 @@ out: /** * avc_update_node - Update an AVC entry + * @avc: the access vector cache * @event : Updating event * @perms : Permission mask bits - * @ssid,@tsid,@tclass : identifier of an AVC entry + * @driver: xperm driver information + * @xperm: xperm permissions + * @ssid: AVC entry source sid + * @tsid: AVC entry target sid + * @tclass : AVC entry target object class * @seqno : sequence number when decision was made * @xpd: extended_perms_decision to be added to the node * @flags: the AVC_* flags, e.g. AVC_EXTENDED_PERMS, or 0. @@ -928,6 +935,7 @@ out: /** * avc_flush - Flush the cache + * @avc: the access vector cache */ static void avc_flush(struct selinux_avc *avc) { @@ -956,6 +964,7 @@ static void avc_flush(struct selinux_avc *avc) /** * avc_ss_reset - Flush the cache and revalidate migrated permissions. + * @avc: the access vector cache * @seqno: policy sequence number */ int avc_ss_reset(struct selinux_avc *avc, u32 seqno) @@ -1105,6 +1114,7 @@ decision: /** * avc_has_perm_noaudit - Check permissions but perform no auditing. + * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -1156,6 +1166,7 @@ inline int avc_has_perm_noaudit(struct selinux_state *state, /** * avc_has_perm - Check permissions and perform any appropriate auditing. + * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e7ebd45ca345..ea7b2876a5ae 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -255,29 +255,6 @@ static inline u32 task_sid_obj(const struct task_struct *task) return sid; } -/* - * get the security ID of a task for use with binder - */ -static inline u32 task_sid_binder(const struct task_struct *task) -{ - /* - * In many case where this function is used we should be using the - * task's subjective SID, but we can't reliably access the subjective - * creds of a task other than our own so we must use the objective - * creds/SID, which are safe to access. The downside is that if a task - * is temporarily overriding it's creds it will not be reflected here; - * however, it isn't clear that binder would handle that case well - * anyway. - * - * If this ever changes and we can safely reference the subjective - * creds/SID of another task, this function will make it easier to - * identify the various places where we make use of the task SIDs in - * the binder code. It is also likely that we will need to adjust - * the main drivers/android binder code as well. - */ - return task_sid_obj(task); -} - static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry); /* @@ -760,7 +737,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "binder") || !strcmp(sb->s_type->name, "bpf") || - !strcmp(sb->s_type->name, "pstore")) + !strcmp(sb->s_type->name, "pstore") || + !strcmp(sb->s_type->name, "securityfs")) sbsec->flags |= SE_SBGENFS; if (!strcmp(sb->s_type->name, "sysfs") || @@ -2066,18 +2044,19 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ -static int selinux_binder_set_context_mgr(struct task_struct *mgr) +static int selinux_binder_set_context_mgr(const struct cred *mgr) { return avc_has_perm(&selinux_state, - current_sid(), task_sid_binder(mgr), SECCLASS_BINDER, + current_sid(), cred_sid(mgr), SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); } -static int selinux_binder_transaction(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transaction(const struct cred *from, + const struct cred *to) { u32 mysid = current_sid(); - u32 fromsid = task_sid_binder(from); + u32 fromsid = cred_sid(from); + u32 tosid = cred_sid(to); int rc; if (mysid != fromsid) { @@ -2088,24 +2067,24 @@ static int selinux_binder_transaction(struct task_struct *from, return rc; } - return avc_has_perm(&selinux_state, fromsid, task_sid_binder(to), + return avc_has_perm(&selinux_state, fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); } -static int selinux_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return avc_has_perm(&selinux_state, - task_sid_binder(from), task_sid_binder(to), + cred_sid(from), cred_sid(to), SECCLASS_BINDER, BINDER__TRANSFER, NULL); } -static int selinux_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static int selinux_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { - u32 sid = task_sid_binder(to); + u32 sid = cred_sid(to); struct file_security_struct *fsec = selinux_file(file); struct dentry *dentry = file->f_path.dentry; struct inode_security_struct *isec; @@ -2948,7 +2927,8 @@ static void selinux_inode_free_security(struct inode *inode) } static int selinux_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, + const struct qstr *name, + const char **xattr_name, void **ctx, u32 *ctxlen) { u32 newsid; @@ -2961,6 +2941,9 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode, if (rc) return rc; + if (xattr_name) + *xattr_name = XATTR_NAME_SELINUX; + return security_sid_to_context(&selinux_state, newsid, (char **)ctx, ctxlen); } @@ -5688,40 +5671,41 @@ static int selinux_tun_dev_open(void *security) #ifdef CONFIG_NETFILTER -static unsigned int selinux_ip_forward(struct sk_buff *skb, - const struct net_device *indev, - u16 family) +static unsigned int selinux_ip_forward(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { - int err; + int ifindex; + u16 family; char *addrp; u32 peer_sid; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 secmark_active; - u8 netlbl_active; - u8 peerlbl_active; + int secmark_active, peerlbl_active; if (!selinux_policycap_netpeer()) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); - netlbl_active = netlbl_enabled(); peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; + family = state->pf; if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) return NF_DROP; + ifindex = state->in->ifindex; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->netif = indev->ifindex; + ad.u.net->netif = ifindex; ad.u.net->family = family; if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) return NF_DROP; if (peerlbl_active) { - err = selinux_inet_sys_rcv_skb(dev_net(indev), indev->ifindex, + int err; + + err = selinux_inet_sys_rcv_skb(state->net, ifindex, addrp, family, peer_sid, &ad); if (err) { selinux_netlbl_err(skb, family, err, 1); @@ -5735,7 +5719,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; - if (netlbl_active) + if (netlbl_enabled()) /* we do this in the FORWARD path and not the POST_ROUTING * path because we want to make sure we apply the necessary * labeling before IPsec is applied so we can leverage AH @@ -5746,24 +5730,8 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_forward(skb, state->in, PF_INET); -} - -#if IS_ENABLED(CONFIG_IPV6) -static unsigned int selinux_ipv6_forward(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_forward(skb, state->in, PF_INET6); -} -#endif /* IPV6 */ - -static unsigned int selinux_ip_output(struct sk_buff *skb, - u16 family) +static unsigned int selinux_ip_output(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { struct sock *sk; u32 sid; @@ -5798,48 +5766,32 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, sid = sksec->sid; } else sid = SECINITSID_KERNEL; - if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) + if (selinux_netlbl_skbuff_setsid(skb, state->pf, sid) != 0) return NF_DROP; return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_output(skb, PF_INET); -} - -#if IS_ENABLED(CONFIG_IPV6) -static unsigned int selinux_ipv6_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_output(skb, PF_INET6); -} -#endif /* IPV6 */ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, - int ifindex, - u16 family) + const struct nf_hook_state *state) { - struct sock *sk = skb_to_full_sk(skb); + struct sock *sk; struct sk_security_struct *sksec; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - char *addrp; u8 proto; + sk = skb_to_full_sk(skb); if (sk == NULL) return NF_ACCEPT; sksec = sk->sk_security; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; - ad.u.net->netif = ifindex; - ad.u.net->family = family; - if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) + ad.u.net->netif = state->out->ifindex; + ad.u.net->family = state->pf; + if (selinux_parse_skb(skb, &ad, NULL, 0, &proto)) return NF_DROP; if (selinux_secmark_enabled()) @@ -5854,26 +5806,26 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ip_postroute(struct sk_buff *skb, - const struct net_device *outdev, - u16 family) +static unsigned int selinux_ip_postroute(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) { + u16 family; u32 secmark_perm; u32 peer_sid; - int ifindex = outdev->ifindex; + int ifindex; struct sock *sk; struct common_audit_data ad; struct lsm_network_audit net = {0,}; char *addrp; - u8 secmark_active; - u8 peerlbl_active; + int secmark_active, peerlbl_active; /* If any sort of compatibility mode is enabled then handoff processing * to the selinux_ip_postroute_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ if (!selinux_policycap_netpeer()) - return selinux_ip_postroute_compat(skb, ifindex, family); + return selinux_ip_postroute_compat(skb, state); secmark_active = selinux_secmark_enabled(); peerlbl_active = selinux_peerlbl_enabled(); @@ -5899,6 +5851,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, return NF_ACCEPT; #endif + family = state->pf; if (sk == NULL) { /* Without an associated socket the packet is either coming * from the kernel or it is being forwarded; check the packet @@ -5959,6 +5912,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, secmark_perm = PACKET__SEND; } + ifindex = state->out->ifindex; ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; ad.u.net->netif = ifindex; @@ -5976,7 +5930,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, u32 if_sid; u32 node_sid; - if (sel_netif_sid(dev_net(outdev), ifindex, &if_sid)) + if (sel_netif_sid(state->net, ifindex, &if_sid)) return NF_DROP; if (avc_has_perm(&selinux_state, peer_sid, if_sid, @@ -5993,23 +5947,6 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, return NF_ACCEPT; } - -static unsigned int selinux_ipv4_postroute(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_postroute(skb, state->out, PF_INET); -} - -#if IS_ENABLED(CONFIG_IPV6) -static unsigned int selinux_ipv6_postroute(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return selinux_ip_postroute(skb, state->out, PF_INET6); -} -#endif /* IPV6 */ - #endif /* CONFIG_NETFILTER */ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) @@ -7013,34 +6950,6 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif -static int selinux_lockdown(enum lockdown_reason what) -{ - struct common_audit_data ad; - u32 sid = current_sid(); - int invalid_reason = (what <= LOCKDOWN_NONE) || - (what == LOCKDOWN_INTEGRITY_MAX) || - (what >= LOCKDOWN_CONFIDENTIALITY_MAX); - - if (WARN(invalid_reason, "Invalid lockdown reason")) { - audit_log(audit_context(), - GFP_ATOMIC, AUDIT_SELINUX_ERR, - "lockdown_reason=invalid"); - return -EINVAL; - } - - ad.type = LSM_AUDIT_DATA_LOCKDOWN; - ad.u.reason = what; - - if (what <= LOCKDOWN_INTEGRITY_MAX) - return avc_has_perm(&selinux_state, - sid, sid, SECCLASS_LOCKDOWN, - LOCKDOWN__INTEGRITY, &ad); - else - return avc_has_perm(&selinux_state, - sid, sid, SECCLASS_LOCKDOWN, - LOCKDOWN__CONFIDENTIALITY, &ad); -} - struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), @@ -7111,6 +7020,35 @@ static int selinux_perf_event_write(struct perf_event *event) } #endif +#ifdef CONFIG_IO_URING +/** + * selinux_uring_override_creds - check the requested cred override + * @new: the target creds + * + * Check to see if the current task is allowed to override it's credentials + * to service an io_uring operation. + */ +static int selinux_uring_override_creds(const struct cred *new) +{ + return avc_has_perm(&selinux_state, current_sid(), cred_sid(new), + SECCLASS_IO_URING, IO_URING__OVERRIDE_CREDS, NULL); +} + +/** + * selinux_uring_sqpoll - check if a io_uring polling thread can be created + * + * Check to see if the current task is allowed to create a new io_uring + * kernel polling thread. + */ +static int selinux_uring_sqpoll(void) +{ + int sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, sid, + SECCLASS_IO_URING, IO_URING__SQPOLL, NULL); +} +#endif /* CONFIG_IO_URING */ + /* * IMPORTANT NOTE: When adding new hooks, please be careful to keep this order: * 1. any hooks that don't belong to (2.) or (3.) below, @@ -7349,7 +7287,10 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), #endif - LSM_HOOK_INIT(locked_down, selinux_lockdown), +#ifdef CONFIG_IO_URING + LSM_HOOK_INIT(uring_override_creds, selinux_uring_override_creds), + LSM_HOOK_INIT(uring_sqpoll, selinux_uring_sqpoll), +#endif /* * PUT "CLONING" (ACCESSING + ALLOCATING) HOOKS HERE @@ -7470,38 +7411,38 @@ DEFINE_LSM(selinux) = { static const struct nf_hook_ops selinux_nf_ops[] = { { - .hook = selinux_ipv4_postroute, + .hook = selinux_ip_postroute, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_SELINUX_LAST, }, { - .hook = selinux_ipv4_forward, + .hook = selinux_ip_forward, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_SELINUX_FIRST, }, { - .hook = selinux_ipv4_output, + .hook = selinux_ip_output, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_SELINUX_FIRST, }, #if IS_ENABLED(CONFIG_IPV6) { - .hook = selinux_ipv6_postroute, + .hook = selinux_ip_postroute, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_SELINUX_LAST, }, { - .hook = selinux_ipv6_forward, + .hook = selinux_ip_forward, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_SELINUX_FIRST, }, { - .hook = selinux_ipv6_output, + .hook = selinux_ip_output, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_SELINUX_FIRST, diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 084757ff4390..35aac62a662e 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -250,10 +250,10 @@ struct security_class_mapping secclass_map[] = { { COMMON_SOCK_PERMS, NULL } }, { "perf_event", { "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } }, - { "lockdown", - { "integrity", "confidentiality", NULL } }, { "anon_inode", { COMMON_FILE_PERMS, NULL } }, + { "io_uring", + { "override_creds", "sqpoll", NULL } }, { NULL } }; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index abaab7683840..29b88e81869b 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -29,6 +29,7 @@ /** * selinux_netlbl_sidlookup_cached - Cache a SID lookup * @skb: the packet + * @family: the packet's address family * @secattr: the NetLabel security attributes * @sid: the SID * @@ -128,6 +129,7 @@ void selinux_netlbl_cache_invalidate(void) /** * selinux_netlbl_err - Handle a NetLabel packet error * @skb: the packet + * @family: the packet's address family * @error: the error code * @gateway: true if host is acting as a gateway, false otherwise * @@ -160,7 +162,6 @@ void selinux_netlbl_sk_security_free(struct sk_security_struct *sksec) /** * selinux_netlbl_sk_security_reset - Reset the NetLabel fields * @sksec: the sk_security_struct - * @family: the socket family * * Description: * Called when the NetLabel state of a sk_security_struct needs to be reset. @@ -313,6 +314,7 @@ assoc_request_return: /** * selinux_netlbl_inet_conn_request - Label an incoming stream connection * @req: incoming connection request socket + * @family: the request socket's address family * * Description: * A new incoming connection request is represented by @req, we need to label @@ -343,6 +345,7 @@ inet_conn_request_return: /** * selinux_netlbl_inet_csk_clone - Initialize the newly created sock * @sk: the new sock + * @family: the sock's address family * * Description: * A new connection has been established using @sk, we've already labeled the @@ -378,7 +381,7 @@ void selinux_netlbl_sctp_sk_clone(struct sock *sk, struct sock *newsk) /** * selinux_netlbl_socket_post_create - Label a socket using NetLabel - * @sock: the socket to label + * @sk: the sock to label * @family: protocol family * * Description: diff --git a/security/selinux/netport.c b/security/selinux/netport.c index b8bc3897891d..9ba09d11c0f5 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -73,7 +73,7 @@ static unsigned int sel_netport_hashfn(u16 pnum) /** * sel_netport_find - Search for a port record * @protocol: protocol - * @port: pnum + * @pnum: port * * Description: * Search the network port table and return the matching record. If an entry diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index b8f6b3e0a921..727c3b484bd3 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/errno.h> #include "hashtab.h" +#include "security.h" static struct kmem_cache *hashtab_node_cachep __ro_after_init; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index d338962fb0c4..3f5fd124342c 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -553,6 +553,7 @@ int mls_compute_sid(struct policydb *p, #ifdef CONFIG_NETLABEL /** * mls_export_netlbl_lvl - Export the MLS sensitivity levels to NetLabel + * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -574,6 +575,7 @@ void mls_export_netlbl_lvl(struct policydb *p, /** * mls_import_netlbl_lvl - Import the NetLabel MLS sensitivity levels + * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -595,6 +597,7 @@ void mls_import_netlbl_lvl(struct policydb *p, /** * mls_export_netlbl_cat - Export the MLS categories to NetLabel + * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * @@ -622,6 +625,7 @@ int mls_export_netlbl_cat(struct policydb *p, /** * mls_import_netlbl_cat - Import the MLS categories from NetLabel + * @p: the policy * @context: the security context * @secattr: the NetLabel security attributes * diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e5f1b2757a83..8e92af7dd284 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1102,7 +1102,7 @@ allow: * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier - * @tclass: target security class + * @orig_tclass: target security class * @avd: access vector decisions * @xperms: extended permissions * @@ -1626,6 +1626,7 @@ int security_context_str_to_sid(struct selinux_state *state, * @scontext_len: length in bytes * @sid: security identifier, SID * @def_sid: default SID to assign on error + * @gfp_flags: the allocator get-free-page (GFP) flags * * Obtains a SID associated with the security context that * has the string representation specified by @scontext. @@ -1919,6 +1920,7 @@ out: * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class + * @qstr: object name * @out_sid: security identifier for new subject/object * * Compute a SID to use for labeling a new subject or object in the @@ -1947,6 +1949,7 @@ int security_transition_sid_user(struct selinux_state *state, /** * security_member_sid - Compute the SID for member selection. + * @state: SELinux state * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class @@ -2273,6 +2276,7 @@ void selinux_policy_commit(struct selinux_state *state, * @state: SELinux state * @data: binary policy data * @len: length of data in bytes + * @load_state: policy load state * * Load a new set of security policy configuration data, * validate it and convert the SID table as necessary. @@ -2377,6 +2381,43 @@ err_policy: } /** + * ocontext_to_sid - Helper to safely get sid for an ocontext + * @sidtab: SID table + * @c: ocontext structure + * @index: index of the context entry (0 or 1) + * @out_sid: pointer to the resulting SID value + * + * For all ocontexts except OCON_ISID the SID fields are populated + * on-demand when needed. Since updating the SID value is an SMP-sensitive + * operation, this helper must be used to do that safely. + * + * WARNING: This function may return -ESTALE, indicating that the caller + * must retry the operation after re-acquiring the policy pointer! + */ +static int ocontext_to_sid(struct sidtab *sidtab, struct ocontext *c, + size_t index, u32 *out_sid) +{ + int rc; + u32 sid; + + /* Ensure the associated sidtab entry is visible to this thread. */ + sid = smp_load_acquire(&c->sid[index]); + if (!sid) { + rc = sidtab_context_to_sid(sidtab, &c->context[index], &sid); + if (rc) + return rc; + + /* + * Ensure the new sidtab entry is visible to other threads + * when they see the SID. + */ + smp_store_release(&c->sid[index], sid); + } + *out_sid = sid; + return 0; +} + +/** * security_port_sid - Obtain the SID for a port. * @state: SELinux state * @protocol: protocol number @@ -2414,17 +2455,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else { *out_sid = SECINITSID_PORT; } @@ -2473,18 +2510,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2497,7 +2529,7 @@ out: * security_ib_endport_sid - Obtain the SID for a subnet management interface. * @state: SELinux state * @dev_name: device name - * @port: port number + * @port_num: port number * @out_sid: security identifier */ int security_ib_endport_sid(struct selinux_state *state, @@ -2533,17 +2565,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2587,25 +2615,13 @@ retry: } if (c) { - if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; - rc = sidtab_context_to_sid(sidtab, &c->context[1], - &c->sid[1]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, if_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *if_sid = c->sid[0]; + if (rc) + goto out; } else *if_sid = SECINITSID_NETIF; @@ -2697,18 +2713,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else { *out_sid = SECINITSID_NODE; } @@ -2849,9 +2860,10 @@ out_unlock: /** * __security_genfs_sid - Helper to obtain a SID for a file in a filesystem + * @policy: policy * @fstype: filesystem type * @path: path from root of mount - * @sclass: file security class + * @orig_sclass: file security class * @sid: SID for path * * Obtain a SID to use for a file in a filesystem that @@ -2873,7 +2885,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, u16 sclass; struct genfs *genfs; struct ocontext *c; - int rc, cmp = 0; + int cmp = 0; while (path[0] == '/' && path[1] == '/') path++; @@ -2887,9 +2899,8 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, break; } - rc = -ENOENT; if (!genfs || cmp) - goto out; + return -ENOENT; for (c = genfs->head; c; c = c->next) { len = strlen(c->u.name); @@ -2898,20 +2909,10 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, break; } - rc = -ENOENT; if (!c) - goto out; - - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); - if (rc) - goto out; - } + return -ENOENT; - *sid = c->sid[0]; - rc = 0; -out: - return rc; + return ocontext_to_sid(sidtab, c, 0, sid); } /** @@ -2919,7 +2920,7 @@ out: * @state: SELinux state * @fstype: filesystem type * @path: path from root of mount - * @sclass: file security class + * @orig_sclass: file security class * @sid: SID for path * * Acquire policy_rwlock before calling __security_genfs_sid() and release @@ -2996,17 +2997,13 @@ retry: if (c) { sbsec->behavior = c->v.behavior; - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, &sbsec->sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - sbsec->sid = c->sid[0]; + if (rc) + goto out; } else { rc = __security_genfs_sid(policy, fstype, "/", SECCLASS_DIR, &sbsec->sid); @@ -3305,6 +3302,7 @@ out_unlock: * @nlbl_sid: NetLabel SID * @nlbl_type: NetLabel labeling protocol type * @xfrm_sid: XFRM SID + * @peer_sid: network peer sid * * Description: * Compare the @nlbl_sid and @xfrm_sid values and if the two SIDs can be diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0622e67c73e9..efd35b07c7f8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4700,6 +4700,48 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, return 0; } +#ifdef CONFIG_IO_URING +/** + * smack_uring_override_creds - Is io_uring cred override allowed? + * @new: the target creds + * + * Check to see if the current task is allowed to override it's credentials + * to service an io_uring operation. + */ +static int smack_uring_override_creds(const struct cred *new) +{ + struct task_smack *tsp = smack_cred(current_cred()); + struct task_smack *nsp = smack_cred(new); + + /* + * Allow the degenerate case where the new Smack value is + * the same as the current Smack value. + */ + if (tsp->smk_task == nsp->smk_task) + return 0; + + if (smack_privileged_cred(CAP_MAC_OVERRIDE, current_cred())) + return 0; + + return -EPERM; +} + +/** + * smack_uring_sqpoll - check if a io_uring polling thread can be created + * + * Check to see if the current task is allowed to create a new io_uring + * kernel polling thread. + */ +static int smack_uring_sqpoll(void) +{ + if (smack_privileged_cred(CAP_MAC_ADMIN, current_cred())) + return 0; + + return -EPERM; +} + +#endif /* CONFIG_IO_URING */ + struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct task_smack), .lbs_file = sizeof(struct smack_known *), @@ -4852,6 +4894,10 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(inode_copy_up, smack_inode_copy_up), LSM_HOOK_INIT(inode_copy_up_xattr, smack_inode_copy_up_xattr), LSM_HOOK_INIT(dentry_create_files_as, smack_dentry_create_files_as), +#ifdef CONFIG_IO_URING + LSM_HOOK_INIT(uring_override_creds, smack_uring_override_creds), + LSM_HOOK_INIT(uring_sqpoll, smack_uring_sqpoll), +#endif }; |