diff options
author | Paul Mackerras <paulus@samba.org> | 2009-03-23 18:22:08 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-06 09:30:26 +0200 |
commit | 37d81828385f8ff823caaaf1a83e72d065b6cfa1 (patch) | |
tree | 972900a193a6a5ab1bdc14adcd7ab72bf0a51c13 /kernel/perf_counter.c | |
parent | 96f6d4444302bb2ea2cf409529eef816462f6ce0 (diff) | |
download | lwn-37d81828385f8ff823caaaf1a83e72d065b6cfa1.tar.gz lwn-37d81828385f8ff823caaaf1a83e72d065b6cfa1.zip |
perf_counter: add an mmap method to allow userspace to read hardware counters
Impact: new feature giving performance improvement
This adds the ability for userspace to do an mmap on a hardware counter
fd and get access to a read-only page that contains the information
needed to translate a hardware counter value to the full 64-bit
counter value that would be returned by a read on the fd. This is
useful on architectures that allow user programs to read the hardware
counters, such as PowerPC.
The mmap will only succeed if the counter is a hardware counter
monitoring the current process.
On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter
and translate it to the full 64-bit value in about 30ns using the
mmapped page, compared to about 830ns for the read syscall on the
counter, so this does give a significant performance improvement.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.297057964@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ce34bff07bda..d9cfd902140e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); + free_page(counter->user_page); free_counter(counter); put_context(ctx); @@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } +void perf_counter_update_userpage(struct perf_counter *counter) +{ + struct perf_counter_mmap_page *userpg; + + if (!counter->user_page) + return; + userpg = (struct perf_counter_mmap_page *) counter->user_page; + + ++userpg->lock; + smp_wmb(); + userpg->index = counter->hw.idx; + userpg->offset = atomic64_read(&counter->count); + if (counter->state == PERF_COUNTER_STATE_ACTIVE) + userpg->offset -= atomic64_read(&counter->hw.prev_count); + smp_wmb(); + ++userpg->lock; +} + +static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + if (!counter->user_page) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(counter->user_page); + get_page(vmf->page); + return 0; +} + +static struct vm_operations_struct perf_mmap_vmops = { + .fault = perf_mmap_fault, +}; + +static int perf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct perf_counter *counter = file->private_data; + unsigned long userpg; + + if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + /* + * For now, restrict to the case of a hardware counter + * on the current task. + */ + if (is_software_counter(counter) || counter->task != current) + return -EINVAL; + + userpg = counter->user_page; + if (!userpg) { + userpg = get_zeroed_page(GFP_KERNEL); + mutex_lock(&counter->mutex); + if (counter->user_page) { + free_page(userpg); + userpg = counter->user_page; + } else { + counter->user_page = userpg; + } + mutex_unlock(&counter->mutex); + if (!userpg) + return -ENOMEM; + } + + perf_counter_update_userpage(counter); + + vma->vm_flags &= ~VM_MAYWRITE; + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + return 0; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, .unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, + .mmap = perf_mmap, }; /* |