From 8d23b2080b4ffe530edc324f233c2a5bb192b152 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Tue, 7 Sep 2021 19:57:33 -0700 Subject: proc: stop using seq_get_buf in proc_task_name Use seq_escape_str and seq_printf instead of poking holes into the seq_file abstraction. Link: https://lkml.kernel.org/r/20210810151945.1795567-1-hch@lst.de Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Christian Brauner <christian.brauner@ubuntu.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/proc/array.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index ee0ce8cecc4a..49be8c8ef555 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -98,27 +98,17 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape) { - char *buf; - size_t size; char tcomm[64]; - int ret; if (p->flags & PF_WQ_WORKER) wq_worker_comm(tcomm, sizeof(tcomm), p); else __get_task_comm(tcomm, sizeof(tcomm), p); - size = seq_get_buf(m, &buf); - if (escape) { - ret = string_escape_str(tcomm, buf, size, - ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); - if (ret >= size) - ret = -1; - } else { - ret = strscpy(buf, tcomm, size); - } - - seq_commit(m, ret); + if (escape) + seq_escape_str(m, tcomm, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + else + seq_printf(m, "%.64s", tcomm); } /* -- cgit v1.2.3 From c2f273ebd89a79ed87ef1025753343e327b99ac9 Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon <ohoono.kwon@samsung.com> Date: Tue, 7 Sep 2021 19:57:35 -0700 Subject: connector: send event on write to /proc/[pid]/comm While comm change event via prctl has been reported to proc connector by 'commit f786ecba4158 ("connector: add comm change event report to proc connector")', connector listeners were missing comm changes by explicit writes on /proc/[pid]/comm. Let explicit writes on /proc/[pid]/comm report to proc connector. Link: https://lkml.kernel.org/r/20210701133458epcms1p68e9eb9bd0eee8903ba26679a37d9d960@epcms1p6 Signed-off-by: Ohhoon Kwon <ohoono.kwon@samsung.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: David S. Miller <davem@davemloft.net> Cc: Christian Brauner <christian.brauner@ubuntu.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/proc/base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index e5b5f7709d48..533d5836eb9a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -95,6 +95,7 @@ #include <linux/posix-timers.h> #include <linux/time_namespace.h> #include <linux/resctrl.h> +#include <linux/cn_proc.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -1674,8 +1675,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf, if (!p) return -ESRCH; - if (same_thread_group(current, p)) + if (same_thread_group(current, p)) { set_task_comm(p, buffer); + proc_comm_connector(p); + } else count = -EINVAL; -- cgit v1.2.3 From 1e1c15839df084f4011825fee922aa976c9159dc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 7 Sep 2021 20:00:00 -0700 Subject: fs/epoll: use a per-cpu counter for user's watches count This counter tracks the number of watches a user has, to compare against the 'max_user_watches' limit. This causes a scalability bottleneck on SPECjbb2015 on large systems as there is only one user. Changing to a per-cpu counter increases throughput of the benchmark by about 30% on a 16-socket, > 1000 thread system. [rdunlap@infradead.org: fix build errors in kernel/user.c when CONFIG_EPOLL=n] [npiggin@gmail.com: move ifdefs into wrapper functions, slightly improve panic message] Link: https://lkml.kernel.org/r/1628051945.fens3r99ox.astroid@bobo.none [akpm@linux-foundation.org: tweak user_epoll_alloc(), per Guenter] Link: https://lkml.kernel.org/r/20210804191421.GA1900577@roeck-us.net Link: https://lkml.kernel.org/r/20210802032013.2751916-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reported-by: Anton Blanchard <anton@ozlabs.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/eventpoll.c | 18 ++++++++++-------- include/linux/sched/user.h | 3 ++- kernel/user.c | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1e596e1d0bba..648ed77f4164 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -723,7 +723,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) */ call_rcu(&epi->rcu, epi_rcu_free); - atomic_long_dec(&ep->user->epoll_watches); + percpu_counter_dec(&ep->user->epoll_watches); return 0; } @@ -1439,7 +1439,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, { int error, pwake = 0; __poll_t revents; - long user_watches; struct epitem *epi; struct ep_pqueue epq; struct eventpoll *tep = NULL; @@ -1449,11 +1448,15 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, lockdep_assert_irqs_enabled(); - user_watches = atomic_long_read(&ep->user->epoll_watches); - if (unlikely(user_watches >= max_user_watches)) + if (unlikely(percpu_counter_compare(&ep->user->epoll_watches, + max_user_watches) >= 0)) return -ENOSPC; - if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) + percpu_counter_inc(&ep->user->epoll_watches); + + if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) { + percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; + } /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); @@ -1466,17 +1469,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, mutex_lock_nested(&tep->mtx, 1); /* Add the current item to the list of active epoll hook for this file */ if (unlikely(attach_epitem(tfile, epi) < 0)) { - kmem_cache_free(epi_cache, epi); if (tep) mutex_unlock(&tep->mtx); + kmem_cache_free(epi_cache, epi); + percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; } if (full_check && !tep) list_file(tfile); - atomic_long_inc(&ep->user->epoll_watches); - /* * Add the current item to the RB tree. All RB tree operations are * protected by "mtx", and ep_insert() is called with "mtx" held. diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 2462f7d07695..00ed419dd464 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -4,6 +4,7 @@ #include <linux/uidgid.h> #include <linux/atomic.h> +#include <linux/percpu_counter.h> #include <linux/refcount.h> #include <linux/ratelimit.h> @@ -13,7 +14,7 @@ struct user_struct { refcount_t __count; /* reference count */ #ifdef CONFIG_EPOLL - atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ + struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */ #endif unsigned long unix_inflight; /* How many files in flight in unix sockets */ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ diff --git a/kernel/user.c b/kernel/user.c index c82399c1618a..e2cf8c22b539 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -129,6 +129,22 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) return NULL; } +static int user_epoll_alloc(struct user_struct *up) +{ +#ifdef CONFIG_EPOLL + return percpu_counter_init(&up->epoll_watches, 0, GFP_KERNEL); +#else + return 0; +#endif +} + +static void user_epoll_free(struct user_struct *up) +{ +#ifdef CONFIG_EPOLL + percpu_counter_destroy(&up->epoll_watches); +#endif +} + /* IRQs are disabled and uidhash_lock is held upon function entry. * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. @@ -138,6 +154,7 @@ static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); + user_epoll_free(up); kmem_cache_free(uid_cachep, up); } @@ -185,6 +202,10 @@ struct user_struct *alloc_uid(kuid_t uid) new->uid = uid; refcount_set(&new->__count, 1); + if (user_epoll_alloc(new)) { + kmem_cache_free(uid_cachep, new); + return NULL; + } ratelimit_state_init(&new->ratelimit, HZ, 100); ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); @@ -195,6 +216,7 @@ struct user_struct *alloc_uid(kuid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { + user_epoll_free(new); kmem_cache_free(uid_cachep, new); } else { uid_hash_insert(new, hashent); @@ -216,6 +238,9 @@ static int __init uid_cache_init(void) for(n = 0; n < UIDHASH_SZ; ++n) INIT_HLIST_HEAD(uidhash_table + n); + if (user_epoll_alloc(&root_user)) + panic("root_user epoll percpu counter alloc failed"); + /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID)); -- cgit v1.2.3 From 5f5dec07aca7067216ed4c1342e464e7307a9197 Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:09 -0700 Subject: nilfs2: fix memory leak in nilfs_sysfs_create_device_group Patch series "nilfs2: fix incorrect usage of kobject". This patchset from Nanyong Sun fixes memory leak issues and a NULL pointer dereference issue caused by incorrect usage of kboject in nilfs2 sysfs implementation. This patch (of 6): Reported by syzkaller: BUG: memory leak unreferenced object 0xffff888100ca8988 (size 8): comm "syz-executor.1", pid 1930, jiffies 4294745569 (age 18.052s) hex dump (first 8 bytes): 6c 6f 6f 70 31 00 ff ff loop1... backtrace: kstrdup+0x36/0x70 mm/util.c:60 kstrdup_const+0x35/0x60 mm/util.c:83 kvasprintf_const+0xf1/0x180 lib/kasprintf.c:48 kobject_set_name_vargs+0x56/0x150 lib/kobject.c:289 kobject_add_varg lib/kobject.c:384 [inline] kobject_init_and_add+0xc9/0x150 lib/kobject.c:473 nilfs_sysfs_create_device_group+0x150/0x7d0 fs/nilfs2/sysfs.c:986 init_nilfs+0xa21/0xea0 fs/nilfs2/the_nilfs.c:637 nilfs_fill_super fs/nilfs2/super.c:1046 [inline] nilfs_mount+0x7b4/0xe80 fs/nilfs2/super.c:1316 legacy_get_tree+0x105/0x210 fs/fs_context.c:592 vfs_get_tree+0x8e/0x2d0 fs/super.c:1498 do_new_mount fs/namespace.c:2905 [inline] path_mount+0xf9b/0x1990 fs/namespace.c:3235 do_mount+0xea/0x100 fs/namespace.c:3248 __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount fs/namespace.c:3433 [inline] __x64_sys_mount+0x14b/0x1f0 fs/namespace.c:3433 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae If kobject_init_and_add return with error, then the cleanup of kobject is needed because memory may be allocated in kobject_init_and_add without freeing. And the place of cleanup_dev_kobject should use kobject_put to free the memory associated with the kobject. As the section "Kobject removal" of "Documentation/core-api/kobject.rst" says, kobject_del() just makes the kobject "invisible", but it is not cleaned up. And no more cleanup will do after cleanup_dev_kobject, so kobject_put is needed here. Link: https://lkml.kernel.org/r/1625651306-10829-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1625651306-10829-2-git-send-email-konishi.ryusuke@gmail.com Reported-by: Hulk Robot <hulkci@huawei.com> Link: https://lkml.kernel.org/r/20210629022556.3985106-2-sunnanyong@huawei.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 68e8d61e28dd..d2d8ea89937a 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -986,7 +986,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, "%s", sb->s_id); if (err) - goto free_dev_subgroups; + goto cleanup_dev_kobject; err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); if (err) @@ -1023,9 +1023,7 @@ delete_mounted_snapshots_group: nilfs_sysfs_delete_mounted_snapshots_group(nilfs); cleanup_dev_kobject: - kobject_del(&nilfs->ns_dev_kobj); - -free_dev_subgroups: + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); failed_create_device_group: -- cgit v1.2.3 From dbc6e7d44a514f231a64d9d5676e001b660b6448 Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:12 -0700 Subject: nilfs2: fix NULL pointer in nilfs_##name##_attr_release In nilfs_##name##_attr_release, kobj->parent should not be referenced because it is a NULL pointer. The release() method of kobject is always called in kobject_put(kobj), in the implementation of kobject_put(), the kobj->parent will be assigned as NULL before call the release() method. So just use kobj to get the subgroups, which is more efficient and can fix a NULL pointer reference problem. Link: https://lkml.kernel.org/r/20210629022556.3985106-3-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index d2d8ea89937a..ec85ac53720d 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -51,11 +51,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \ #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ static void nilfs_##name##_attr_release(struct kobject *kobj) \ { \ - struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ - struct the_nilfs *nilfs = container_of(kobj->parent, \ - struct the_nilfs, \ - ns_##parent_name##_kobj); \ - subgroups = nilfs->ns_##parent_name##_subgroups; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ -- cgit v1.2.3 From 24f8cb1ed057c840728167dab33b32e44147c86f Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:15 -0700 Subject: nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group If kobject_init_and_add return with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-4-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-4-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index ec85ac53720d..6305e4ef7e39 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -79,8 +79,8 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ #name); \ if (err) \ - return err; \ - return 0; \ + kobject_put(kobj); \ + return err; \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ -- cgit v1.2.3 From a3e181259ddd61fd378390977a1e4e2316853afa Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:18 -0700 Subject: nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group The kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del. See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-5-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-5-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 6305e4ef7e39..d989e6500bd7 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -84,7 +84,7 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ - kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ } /************************************************************************ -- cgit v1.2.3 From b2fe39c248f3fa4bbb2a20759b4fdd83504190f7 Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:21 -0700 Subject: nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group If kobject_init_and_add returns with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-6-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-6-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index d989e6500bd7..5ba87573ad3b 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -195,9 +195,9 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) } if (err) - return err; + kobject_put(&root->snapshot_kobj); - return 0; + return err; } void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) -- cgit v1.2.3 From 17243e1c3072b8417a5ebfc53065d0a87af7ca77 Mon Sep 17 00:00:00 2001 From: Nanyong Sun <sunnanyong@huawei.com> Date: Tue, 7 Sep 2021 20:00:23 -0700 Subject: nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del(). See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-7-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-7-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 5ba87573ad3b..62f8a7ac19c8 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -202,7 +202,7 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) { - kobject_del(&root->snapshot_kobj); + kobject_put(&root->snapshot_kobj); } /************************************************************************ -- cgit v1.2.3 From 98e2e409e76ef7781d8511f997359e9c504a95c1 Mon Sep 17 00:00:00 2001 From: Zhen Lei <thunder.leizhen@huawei.com> Date: Tue, 7 Sep 2021 20:00:26 -0700 Subject: nilfs2: use refcount_dec_and_lock() to fix potential UAF When the refcount is decreased to 0, the resource reclamation branch is entered. Before CPU0 reaches the race point (1), CPU1 may obtain the spinlock and traverse the rbtree to find 'root', see nilfs_lookup_root(). Although CPU1 will call refcount_inc() to increase the refcount, it is obviously too late. CPU0 will release 'root' directly, CPU1 then accesses 'root' and triggers UAF. Use refcount_dec_and_lock() to ensure that both the operations of decrease refcount to 0 and link deletion are lock protected eliminates this risk. CPU0 CPU1 nilfs_put_root(): <-------- (1) spin_lock(&nilfs->ns_cptree_lock); rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); kfree(root); <-------- use-after-free refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 9476 at lib/refcount.c:28 \ refcount_warn_saturate+0x1cf/0x210 lib/refcount.c:28 Modules linked in: CPU: 2 PID: 9476 Comm: syz-executor.0 Not tainted 5.10.45-rc1+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... RIP: 0010:refcount_warn_saturate+0x1cf/0x210 lib/refcount.c:28 ... ... Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] nilfs_put_root+0xc1/0xd0 fs/nilfs2/the_nilfs.c:795 nilfs_segctor_destroy fs/nilfs2/segment.c:2749 [inline] nilfs_detach_log_writer+0x3fa/0x570 fs/nilfs2/segment.c:2812 nilfs_put_super+0x2f/0xf0 fs/nilfs2/super.c:467 generic_shutdown_super+0xcd/0x1f0 fs/super.c:464 kill_block_super+0x4a/0x90 fs/super.c:1446 deactivate_locked_super+0x6a/0xb0 fs/super.c:335 deactivate_super+0x85/0x90 fs/super.c:366 cleanup_mnt+0x277/0x2e0 fs/namespace.c:1118 __cleanup_mnt+0x15/0x20 fs/namespace.c:1125 task_work_run+0x8e/0x110 kernel/task_work.c:151 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:164 [inline] exit_to_user_mode_prepare+0x13c/0x170 kernel/entry/common.c:191 syscall_exit_to_user_mode+0x16/0x30 kernel/entry/common.c:266 do_syscall_64+0x45/0x80 arch/x86/entry/common.c:56 entry_SYSCALL_64_after_hwframe+0x44/0xa9 There is no reproduction program, and the above is only theoretical analysis. Link: https://lkml.kernel.org/r/1629859428-5906-1-git-send-email-konishi.ryusuke@gmail.com Fixes: ba65ae4729bf ("nilfs2: add checkpoint tree to nilfs object") Link: https://lkml.kernel.org/r/20210723012317.4146-1-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/nilfs2/the_nilfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 8b7b01a380ce..c8bfc01da5d7 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -792,14 +792,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) void nilfs_put_root(struct nilfs_root *root) { - if (refcount_dec_and_test(&root->count)) { - struct the_nilfs *nilfs = root->nilfs; + struct the_nilfs *nilfs = root->nilfs; - nilfs_sysfs_delete_snapshot_group(root); - - spin_lock(&nilfs->ns_cptree_lock); + if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) { rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); + + nilfs_sysfs_delete_snapshot_group(root); iput(root->ifile); kfree(root); -- cgit v1.2.3 From dbd9d6f8fa9c3e676e491ac65c7fa10a1af8321f Mon Sep 17 00:00:00 2001 From: David Oberhollenzer <david.oberhollenzer@sigma-star.at> Date: Tue, 7 Sep 2021 20:00:29 -0700 Subject: fs/coredump.c: log if a core dump is aborted due to changed file permissions For obvious security reasons, a core dump is aborted if the filesystem cannot preserve ownership or permissions of the dump file. This affects filesystems like e.g. vfat, but also something like a 9pfs share in a Qemu test setup, running as a regular user, depending on the security model used. In those cases, the result is an empty core file and a confused user. To hopefully save other people a lot of time figuring out the cause, this patch adds a simple log message for those specific cases. [akpm@linux-foundation.org: s/|%s/%s/ in printk text] Link: https://lkml.kernel.org/r/20210701233151.102720-1-david.oberhollenzer@sigma-star.at Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/coredump.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 07afb5ddb1c4..4b3c75732c97 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -782,10 +782,17 @@ void do_coredump(const kernel_siginfo_t *siginfo) * filesystem. */ mnt_userns = file_mnt_user_ns(cprm.file); - if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), current_fsuid())) + if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), + current_fsuid())) { + pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n", + cn.corename); goto close_fail; - if ((inode->i_mode & 0677) != 0600) + } + if ((inode->i_mode & 0677) != 0600) { + pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n", + cn.corename); goto close_fail; + } if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; if (do_truncate(mnt_userns, cprm.file->f_path.dentry, -- cgit v1.2.3 From 6fcac87e1f9e5b27805a2a404f4849194bb51de8 Mon Sep 17 00:00:00 2001 From: QiuXi <qiuxi1@huawei.com> Date: Tue, 7 Sep 2021 20:00:32 -0700 Subject: coredump: fix memleak in dump_vma_snapshot() dump_vma_snapshot() allocs memory for *vma_meta, when dump_vma_snapshot() returns -EFAULT, the memory will be leaked, so we free it correctly. Link: https://lkml.kernel.org/r/20210810020441.62806-1-qiuxi1@huawei.com Fixes: a07279c9a8cd7 ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Signed-off-by: QiuXi <qiuxi1@huawei.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Jann Horn <jannh@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/coredump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 4b3c75732c97..3224dee44d30 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1134,8 +1134,10 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, mmap_write_unlock(mm); - if (WARN_ON(i != *vma_count)) + if (WARN_ON(i != *vma_count)) { + kvfree(*vma_meta); return -EFAULT; + } *vma_data_size_ptr = vma_data_size; return 0; -- cgit v1.2.3