diff options
Diffstat (limited to 'kernel/events/core.c')
| -rw-r--r-- | kernel/events/core.c | 111 |
1 files changed, 94 insertions, 17 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 6d1f8bad7e1c..954c36e28101 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -58,6 +58,7 @@ #include <linux/percpu-rwsem.h> #include <linux/unwind_deferred.h> #include <linux/kvm_types.h> +#include <linux/seq_file.h> #include "internal.h" @@ -5303,6 +5304,7 @@ static void free_event_rcu(struct rcu_head *head) if (event->ns) put_pid_ns(event->ns); perf_event_free_filter(event); + kfree(event->addr_filter_ranges); kmem_cache_free(perf_event_cache, event); } @@ -5750,8 +5752,6 @@ static void __free_event(struct perf_event *event) if (event->attach_state & PERF_ATTACH_CALLCHAIN) put_callchain_buffers(); - kfree(event->addr_filter_ranges); - if (event->attach_state & PERF_ATTACH_EXCLUSIVE) exclusive_event_destroy(event); @@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) } static void perf_pmu_output_stop(struct perf_event *event); +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb); /* * A buffer can be mmap()ed multiple times; either directly through the same @@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma) mapped_f unmapped = get_mapped(event, event_unmapped); struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; - int mmap_locked = rb->mmap_locked; - unsigned long size = perf_data_size(rb); bool detach_rest = false; /* FIXIES vs perf_pmu_unregister() */ @@ -7117,11 +7116,7 @@ again: * Aside from that, this buffer is 'fully' detached and unmapped, * undo the VM accounting. */ - - atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, - &mmap_user->locked_vm); - atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); - free_uid(mmap_user); + perf_mmap_unaccount(vma, rb); out_put: ring_buffer_put(rb); /* could be last */ @@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long atomic64_add(extra, &vma->vm_mm->pinned_vm); } +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb) +{ + struct user_struct *user = rb->mmap_user; + + atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked, + &user->locked_vm); + atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm); +} + static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, unsigned long nr_pages) { @@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (!rb) return -ENOMEM; - refcount_set(&rb->mmap_count, 1); - rb->mmap_user = get_current_user(); rb->mmap_locked = extra; ring_buffer_attach(event, rb); @@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mapped(event, vma->vm_mm); /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). + * Try to map it into the page table. On fail undo the above, + * as the callsite expects full cleanup in this case and + * therefore does not invoke vmops::close(). */ ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); + if (likely(!ret)) + return 0; + + /* Error path */ + + /* + * If this is the first mmap(), then event->mmap_count should + * be stable at 1. It is only modified by: + * perf_mmap_{open,close}() and perf_mmap(). + * + * The former are not possible because this mmap() hasn't been + * successful yet, and the latter is serialized by + * event->mmap_mutex which we still hold (note that mmap_lock + * is not strictly sufficient here, because the event fd can + * be passed to another process through trivial means like + * fork(), leading to concurrent mmap() from different mm). + * + * Make sure to remove event->rb before releasing + * event->mmap_mutex, such that any concurrent mmap() will not + * attempt use this failed buffer. + */ + if (refcount_read(&event->mmap_count) == 1) { + /* + * Minimal perf_mmap_close(); there can't be AUX or + * other events on account of this being the first. + */ + mapped = get_mapped(event, event_unmapped); + if (mapped) + mapped(event, vma->vm_mm); + perf_mmap_unaccount(vma, event->rb); + ring_buffer_attach(event, NULL); /* drops last rb->refcount */ + refcount_set(&event->mmap_count, 0); + return ret; + } + + /* + * Otherwise this is an already existing buffer, and there is + * no race vs first exposure, so fall-through and call + * perf_mmap_close(). + */ } + perf_mmap_close(vma); return ret; } @@ -7506,6 +7546,33 @@ static int perf_fasync(int fd, struct file *filp, int on) return 0; } +static void perf_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct perf_event *event = f->private_data; + struct perf_event_context *ctx; + struct mutex *child_mutex; + + ctx = perf_event_ctx_lock(event); + child_mutex = event->parent ? &event->parent->child_mutex : &event->child_mutex; + mutex_lock(child_mutex); + + seq_printf(m, "perf_event_attr.type:\t%u\n", event->orig_type); + if (event->pmu) + seq_printf(m, "pmu_type:\t%u\n", event->pmu->type); + seq_printf(m, "perf_event_attr.config:\t0x%llx\n", (unsigned long long)event->attr.config); + seq_printf(m, "perf_event_attr.config1:\t0x%llx\n", + (unsigned long long)event->attr.config1); + seq_printf(m, "perf_event_attr.config2:\t0x%llx\n", + (unsigned long long)event->attr.config2); + seq_printf(m, "perf_event_attr.config3:\t0x%llx\n", + (unsigned long long)event->attr.config3); + seq_printf(m, "perf_event_attr.config4:\t0x%llx\n", + (unsigned long long)event->attr.config4); + + mutex_unlock(child_mutex); + perf_event_ctx_unlock(event, ctx); +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, @@ -7514,6 +7581,7 @@ static const struct file_operations perf_fops = { .compat_ioctl = perf_compat_ioctl, .mmap = perf_mmap, .fasync = perf_fasync, + .show_fdinfo = perf_show_fdinfo, }; /* @@ -11643,6 +11711,15 @@ static int __perf_event_set_bpf_prog(struct perf_event *event, /* only uprobe programs are allowed to be sleepable */ return -EINVAL; + if (prog->type == BPF_PROG_TYPE_TRACEPOINT && prog->sleepable) { + /* + * Sleepable tracepoint programs can only attach to faultable + * tracepoints. Currently only syscall tracepoints are faultable. + */ + if (!is_syscall_tp) + return -EINVAL; + } + /* Kprobe override only works for kprobes, not uprobes. */ if (prog->kprobe_override && !is_kprobe) return -EINVAL; |
