diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-11-07 12:50:48 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-11-07 13:20:17 +0100 |
commit | 666fab4a3ea143315a9c059fad9f3a0f1365d54b (patch) | |
tree | e9e4be3b0eeac79346d52a86183326617d0c9999 /kernel/fork.c | |
parent | 0a986ea81e1aa8ac17e82cda53cc95158217956e (diff) | |
parent | 659caaf65dc9c7150aa3e80225ec6e66b25ab3ce (diff) | |
download | lwn-666fab4a3ea143315a9c059fad9f3a0f1365d54b.tar.gz lwn-666fab4a3ea143315a9c059fad9f3a0f1365d54b.zip |
Merge branch 'linus' into perf/kprobes
Conflicts:
include/asm-generic/atomic-instrumented.h
kernel/kprobes.c
Use the upstream atomic-instrumented.h checksum, and pick
the kprobes version of kernel/kprobes.c, which effectively
reverts this upstream workaround:
645f224e7ba2: ("kprobes: Tell lockdep about kprobe nesting")
Since the new code *should* be fine without nesting.
Knock on wood ...
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 52 |
1 files changed, 40 insertions, 12 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 53a1f508a097..b9c289d0f4ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -95,6 +95,7 @@ #include <linux/stackleak.h> #include <linux/kasan.h> #include <linux/scs.h> +#include <linux/io_uring.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> @@ -555,10 +556,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, get_file(file); if (tmp->vm_flags & VM_DENYWRITE) - atomic_dec(&inode->i_writecount); + put_write_access(inode); i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) - atomic_inc(&mapping->i_mmap_writable); + mapping_allow_writable(mapping); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_interval_tree_insert_after(tmp, mpnt, @@ -589,7 +590,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) - retval = copy_page_range(mm, oldmm, mpnt); + retval = copy_page_range(tmp, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); @@ -728,6 +729,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); + io_uring_free(tsk); cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); @@ -1011,6 +1013,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; + atomic_set(&mm->has_pinned, 0); atomic64_set(&mm->pinned_vm, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); @@ -1809,6 +1812,25 @@ static __always_inline void delayed_free_task(struct task_struct *tsk) free_task(tsk); } +static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) +{ + /* Skip if kernel thread */ + if (!tsk->mm) + return; + + /* Skip if spawning a thread or using vfork */ + if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM) + return; + + /* We need to synchronize with __set_oom_adj */ + mutex_lock(&oom_adj_mutex); + set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); + /* Update the values in case they were changed after copy_signal */ + tsk->signal->oom_score_adj = current->signal->oom_score_adj; + tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; + mutex_unlock(&oom_adj_mutex); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1982,6 +2004,10 @@ static __latent_entropy struct task_struct *copy_process( p->vtime.state = VTIME_INACTIVE; #endif +#ifdef CONFIG_IO_URING + p->io_uring = NULL; +#endif + #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif @@ -2167,7 +2193,7 @@ static __latent_entropy struct task_struct *copy_process( /* * Ensure that the cgroup subsystem policies allow the new process to be - * forked. It should be noted the the new process's css_set can be changed + * forked. It should be noted that the new process's css_set can be changed * between here and cgroup_post_fork() if an organisation operation is in * progress. */ @@ -2285,6 +2311,8 @@ static __latent_entropy struct task_struct *copy_process( trace_task_newtask(p, clone_flags); uprobe_copy_process(p, clone_flags); + copy_oom_score_adj(clone_flags, p); + return p; bad_fork_cancel_cgroup: @@ -2388,14 +2416,14 @@ struct mm_struct *copy_init_mm(void) * * args->exit_signal is expected to be checked for sanity by the caller. */ -long _do_fork(struct kernel_clone_args *args) +pid_t kernel_clone(struct kernel_clone_args *args) { u64 clone_flags = args->flags; struct completion vfork; struct pid *pid; struct task_struct *p; int trace = 0; - long nr; + pid_t nr; /* * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument @@ -2481,7 +2509,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) .stack_size = (unsigned long)arg, }; - return _do_fork(&args); + return kernel_clone(&args); } #ifdef __ARCH_WANT_SYS_FORK @@ -2492,7 +2520,7 @@ SYSCALL_DEFINE0(fork) .exit_signal = SIGCHLD, }; - return _do_fork(&args); + return kernel_clone(&args); #else /* can not support in nommu mode */ return -EINVAL; @@ -2508,7 +2536,7 @@ SYSCALL_DEFINE0(vfork) .exit_signal = SIGCHLD, }; - return _do_fork(&args); + return kernel_clone(&args); } #endif @@ -2546,7 +2574,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, .tls = tls, }; - return _do_fork(&args); + return kernel_clone(&args); } #endif @@ -2704,7 +2732,7 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) if (!clone3_args_valid(&kargs)) return -EINVAL; - return _do_fork(&kargs); + return kernel_clone(&kargs); } #endif @@ -2867,7 +2895,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, /* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* - * functions used by _do_fork() cannot be used here directly + * functions used by kernel_clone() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. |