diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-03-23 07:59:38 +0100 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-03-23 07:59:38 +0100 |
commit | baca54d956f77be9abc487bcdddf7a2a1fbbda1b (patch) | |
tree | 4dbd646d7f533c2c741ecb428563a84f5af8ceec /kernel | |
parent | 217fb361167d4e108b9076e826a6d9624252696a (diff) | |
parent | 16fbf79b0f83bc752cee8589279f1ebfe57b3b6e (diff) | |
download | lwn-baca54d956f77be9abc487bcdddf7a2a1fbbda1b.tar.gz lwn-baca54d956f77be9abc487bcdddf7a2a1fbbda1b.zip |
Merge 5.6-rc7 into char-misc-next
We need the char/misc driver fixes in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 3 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 43 | ||||
-rw-r--r-- | kernel/futex.c | 93 | ||||
-rw-r--r-- | kernel/notifier.c | 2 | ||||
-rw-r--r-- | kernel/pid.c | 10 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 14 |
8 files changed, 110 insertions, 59 deletions
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index be1a1c83cdd1..f2d7cea86ffe 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) */ p++; if (p >= end) { + (*pos)++; return NULL; } else { *pos = *p; @@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!pathbuf || !agentbuf) + if (!pathbuf || !agentbuf || !strlen(agentbuf)) goto out; spin_lock_irq(&css_set_lock); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 75f687301bbf..3dead0416b91 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v) static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_CPU); } @@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4463,10 +4467,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4485,11 +4493,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -6258,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/kernel/futex.c b/kernel/futex.c index 0cf84c8664f2..82dfacb3250e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } @@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ + smp_mb(); /* explicit smp_mb(); (B) */ break; case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies smp_mb(); (B) */ @@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); @@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, return timeout; } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex @@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, * * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ @@ -659,8 +704,6 @@ again: key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -692,40 +735,14 @@ again: goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel which is almost certainly an - * application bug. In such a case, just retry. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (!atomic_inc_not_zero(&inode->i_count)) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; + key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + out: put_page(page); return err; diff --git a/kernel/notifier.c b/kernel/notifier.c index 63d7501ac638..5989bbb93039 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/kernel/pid.c b/kernel/pid.c index 0f4ecb57214c..647b4bb457b5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tmp = tmp->parent; } + /* + * ENOMEM is not the most obvious choice especially for the case + * where the child subreaper has already exited and the pid + * namespace denies the creation of any new processes. But ENOMEM + * is what we have exposed to userspace for a long time and it is + * documented behavior for pid namespaces. So we can't easily + * change it even if there were an error code better suited. + */ + retval = -ENOMEM; + if (unlikely(is_child_reaper(pid))) { if (pid_ns_prepare_proc(ns)) goto out_free; diff --git a/kernel/sys.c b/kernel/sys.c index f9bc5c303e3f..d325f3ab624a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -47,6 +47,7 @@ #include <linux/syscalls.h> #include <linux/kprobes.h> #include <linux/user_namespace.h> +#include <linux/time_namespace.h> #include <linux/binfmts.h> #include <linux/sched.h> @@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info) memset(info, 0, sizeof(struct sysinfo)); ktime_get_boottime_ts64(&tp); + timens_add_boottime(&tp); info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f7ee102868a..fd81c7de77a7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs); + if (rec) + break; } return rec; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 301db4406bc3..4e01c448b4b4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, return; rcu_read_lock(); retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be |