From 90ec5e89e393c76e19afc845d8f88a5dc8315919 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Feb 2017 19:23:37 +0530 Subject: kretprobes: Ensure probe location is at function entry kretprobes can be registered by specifying an absolute address or by specifying offset to a symbol. However, we need to ensure this falls at function entry so as to be able to determine the return address. Validate the same during kretprobe registration. By default, there should not be any offset from a function entry, as determined through a kallsyms_lookup(). Introduce arch_function_offset_within_entry() as a way for architectures to override this. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/f1583bc4839a3862cfc2acefcc56f9c8837fa2ba.1487770934.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcad..177bdf6c6aeb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,6 +267,7 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); +extern bool arch_function_offset_within_entry(unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -- cgit v1.2.3 From e422267322cd319e2695a535e47c5b1feeac45eb Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:36 +0530 Subject: perf: Add PERF_RECORD_NAMESPACES to include namespaces related info With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for recording namespaces related info. By recording info for every namespace, it is left to userspace to take a call on the definition of a container and trace containers by updating perf tool accordingly. Each namespace has a combination of device and inode numbers. Though every namespace has the same device number currently, that may change in future to avoid the need for a namespace of namespaces. Considering such possibility, record both device and inode numbers separately for each namespace. Signed-off-by: Hari Bathini Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 + include/uapi/linux/perf_event.h | 32 ++++++++- kernel/events/core.c | 139 ++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 2 + kernel/nsproxy.c | 3 + 5 files changed, 177 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000fdb211c7d..f19a82362851 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485a24ac..bec0aad0e15c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..16c877a121c8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec) perf_event_comm_event(&comm_event); } +/* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[NR_NAMESPACES]; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + if (ret) + return; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + struct perf_ns_link_info *ns_link_info; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + .nr_namespaces = NR_NAMESPACES, + /* .link_info[NR_NAMESPACES] */ + }, + }; + + ns_link_info = namespaces_event.event_id.link_info; + + perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + /* * mmap tracking */ @@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..afa2947286cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } + perf_event_namespaces(current); + bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e59eed..f6c5d330059a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } switch_task_namespaces(tsk, new_nsproxy); + + perf_event_namespaces(tsk); out: fput(file); return err; -- cgit v1.2.3 From 1d585e70905e03e8c19c9aaf523ec246ae6b18a1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:06 +0530 Subject: trace/kprobes: Fix check for kretprobe offset within function entry perf specifies an offset from _text and since this offset is fed directly into the arch-specific helper, kprobes tracer rejects installation of kretprobes through perf. Fix this by looking up the actual offset from a function for the specified sym+offset. Refactor and reuse existing routines to limit code duplication -- we repurpose kprobe_addr() for determining final kprobe address and we split out the function entry offset determination into a separate generic helper. Before patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7ff] Probe point found: do_open+0 Matched function: do_open [35d76dc] found inline addr: 0xc0000000004ba9c4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469776 Failed to write event: Invalid argument Error: Failed to add events. Reason: Invalid argument (Code: -22) naveen@ubuntu:~/linux/tools/perf$ dmesg | tail [ 33.568656] Given offset is not valid for return probe. After patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7d6] Probe point found: do_open+0 Matched function: do_open [35d76b3] found inline addr: 0xc0000000004ba9e4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469808 Writing event: r:probe/do_open_1 _text+4956344 Added new events: probe:do_open (on do_open%return) probe:do_open_1 (on do_open%return) You can now use it in all perf tools, such as: perf record -e probe:do_open_1 -aR sleep 1 naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/kprobes/list c000000000041370 k kretprobe_trampoline+0x0 [OPTIMIZED] c0000000004ba0b8 r do_open+0x8 [DISABLED] c000000000443430 r do_open+0x0 [DISABLED] Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/d8cd1ef420ec22e3643ac332fdabcffc77319a42.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + kernel/kprobes.c | 40 ++++++++++++++++++++++++++-------------- kernel/trace/trace_kprobe.c | 2 +- 3 files changed, 28 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 177bdf6c6aeb..47e4da5b4fa2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -268,6 +268,7 @@ extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern bool arch_function_offset_within_entry(unsigned long offset); +extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 4780ec236035..d733479a10ee 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, + const char *symbol_name, unsigned int offset) { - kprobe_opcode_t *addr = p->addr; - - if ((p->symbol_name && p->addr) || - (!p->symbol_name && !p->addr)) + if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; - if (p->symbol_name) { - kprobe_lookup_name(p->symbol_name, addr); + if (symbol_name) { + kprobe_lookup_name(symbol_name, addr); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); + addr = (kprobe_opcode_t *)(((char *)addr) + offset); if (addr) return addr; @@ -1413,6 +1411,11 @@ invalid: return ERR_PTR(-EINVAL); } +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +{ + return _kprobe_addr(p->addr, p->symbol_name, p->offset); +} + /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { @@ -1881,19 +1884,28 @@ bool __weak arch_function_offset_within_entry(unsigned long offset) return !offset; } +bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +{ + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + + if (IS_ERR(kp_addr)) + return false; + + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || + !arch_function_offset_within_entry(offset)) + return false; + + return true; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; int i; void *addr; - unsigned long offset; - - addr = kprobe_addr(&rp->kp); - if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) - return -EINVAL; - if (!arch_function_offset_within_entry(offset)) + if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) return -EINVAL; if (kretprobe_blacklist_size) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 12fb540da0e5..013f4e7146d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -697,7 +697,7 @@ static int create_trace_kprobe(int argc, char **argv) return ret; } if (offset && is_return && - !arch_function_offset_within_entry(offset)) { + !function_offset_within_entry(NULL, symbol, offset)) { pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } -- cgit v1.2.3 From f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 20 Feb 2017 15:33:50 +0200 Subject: perf/core: Keep AUX flags in the output handle In preparation for adding more flags to perf AUX records, introduce a separate API for setting the flags for a session, rather than appending more bool arguments to perf_aux_output_end. This allows to set each flag at the time a corresponding condition is detected, instead of tracking it in each driver's private state. Signed-off-by: Will Deacon Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 16 +++++------ arch/x86/events/intel/pt.c | 17 ++++++------ arch/x86/events/intel/pt.h | 1 - drivers/hwtracing/coresight/coresight-etb10.c | 9 +++---- drivers/hwtracing/coresight/coresight-etm-perf.c | 9 +++---- drivers/hwtracing/coresight/coresight-priv.h | 2 -- drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++-- include/linux/coresight.h | 2 +- include/linux/perf_event.h | 8 +++--- kernel/events/ring_buffer.c | 34 ++++++++++++++++-------- 10 files changed, 56 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e31daca..8ae8c5ce3a1f 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,7 +63,6 @@ struct bts_buffer { unsigned int cur_buf; bool snapshot; local_t data_size; - local_t lost; local_t head; unsigned long end; void **data_pages; @@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts) return; if (ds->bts_index >= ds->bts_absolute_maximum) - local_inc(&buf->lost); + perf_aux_output_flag(&bts->handle, + PERF_AUX_FLAG_TRUNCATED); /* * old and head are always in the same physical buffer, so we @@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags) return; fail_end_stop: - perf_aux_output_end(&bts->handle, 0, false); + perf_aux_output_end(&bts->handle, 0); fail_stop: event->hw.state = PERF_HES_STOPPED; @@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); - - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); + perf_aux_output_end(&bts->handle, + local_xchg(&buf->data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -484,8 +483,7 @@ int intel_bts_interrupt(void) if (old_head == local_read(&buf->head)) return handled; - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); buf = perf_aux_output_begin(&bts->handle, event); if (buf) @@ -500,7 +498,7 @@ int intel_bts_interrupt(void) * cleared handle::event */ barrier(); - perf_aux_output_end(&bts->handle, 0, false); + perf_aux_output_end(&bts->handle, 0); } } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5900471ee508..0218728be37a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -753,7 +753,8 @@ static void pt_handle_status(struct pt *pt) */ if (!pt_cap_get(PT_CAP_topa_multiple_entries) || buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { - local_inc(&buf->lost); + perf_aux_output_flag(&pt->handle, + PERF_AUX_FLAG_TRUNCATED); advance++; } } @@ -846,8 +847,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, /* can't stop in the middle of an output region */ if (buf->output_off + handle->size + 1 < - sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) + sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); return -EINVAL; + } /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ @@ -1192,8 +1195,7 @@ void intel_pt_interrupt(void) pt_update_head(pt); - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), - local_xchg(&buf->lost, 0)); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); if (!event->hw.state) { int ret; @@ -1208,7 +1210,7 @@ void intel_pt_interrupt(void) /* snapshot counters don't use PMI, so it's safe */ ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { - perf_aux_output_end(&pt->handle, 0, true); + perf_aux_output_end(&pt->handle, 0); return; } @@ -1280,7 +1282,7 @@ static void pt_event_start(struct perf_event *event, int mode) return; fail_end_stop: - perf_aux_output_end(&pt->handle, 0, true); + perf_aux_output_end(&pt->handle, 0); fail_stop: hwc->state = PERF_HES_STOPPED; } @@ -1321,8 +1323,7 @@ static void pt_event_stop(struct perf_event *event, int mode) pt->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); - perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), - local_xchg(&buf->lost, 0)); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0)); } } diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 53473c21b554..b528e8f373e4 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -143,7 +143,6 @@ struct pt_buffer { size_t output_off; unsigned long nr_pages; local_t data_size; - local_t lost; local64_t head; bool snapshot; unsigned long stop_pos, intr_pos; diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d7325c6534ad..979ea6ec7902 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev, static unsigned long etb_reset_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { unsigned long size = 0; struct cs_buffers *buf = sink_config; @@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev, (unsigned long)write_ptr); write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* @@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + ETB_STATUS_REG); if (status & ETB_STATUS_RAM_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = capacity; read_ptr = write_ptr; } else { @@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev, if (read_ptr > (drvdata->buffer_depth - 1)) read_ptr -= drvdata->buffer_depth; /* let the decoder know we've skipped ahead */ - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* finally tell HW where we want to start reading from */ diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 26cfac3e6de7..288a423c1b27 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -302,7 +302,8 @@ out: return; fail_end_stop: - perf_aux_output_end(handle, 0, true); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + perf_aux_output_end(handle, 0); fail: event->hw.state = PERF_HES_STOPPED; goto out; @@ -310,7 +311,6 @@ fail: static void etm_event_stop(struct perf_event *event, int mode) { - bool lost; int cpu = smp_processor_id(); unsigned long size; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); @@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode) return; size = sink_ops(sink)->reset_buffer(sink, handle, - event_data->snk_config, - &lost); + event_data->snk_config); - perf_aux_output_end(handle, size, lost); + perf_aux_output_end(handle, size); } /* Disabling the path make its elements available to other sessions */ diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index ef9d8e93e3b2..5f662d82052c 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -76,7 +76,6 @@ enum cs_mode { * @nr_pages: max number of pages granted to us * @offset: offset within the current buffer * @data_size: how much we collected in this run - * @lost: other than zero if we had a HW buffer wrap around * @snapshot: is this run in snapshot mode * @data_pages: a handle the ring buffer */ @@ -85,7 +84,6 @@ struct cs_buffers { unsigned int nr_pages; unsigned long offset; local_t data_size; - local_t lost; bool snapshot; void **data_pages; }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 1549436e2492..aec61a6d5c63 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { long size = 0; struct cs_buffers *buf = sink_config; @@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + TMC_STS); if (status & TMC_STS_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = drvdata->size; } else { to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); @@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, read_ptr -= drvdata->size; /* Tell the HW */ writel_relaxed(read_ptr, drvdata->base + TMC_RRP); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } cur = buf->cur; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 2a5982c37dfb..035c16c9a505 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -201,7 +201,7 @@ struct coresight_ops_sink { void *sink_config); unsigned long (*reset_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost); + void *sink_config); void (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f19a82362851..b6e75c9d4791 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -801,6 +801,7 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; + u64 aux_flags; union { void *addr; unsigned long head; @@ -849,10 +850,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, - unsigned long size, bool truncated); + unsigned long size); extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); +extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -1268,8 +1270,8 @@ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } static inline void -perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) { } +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) + { } static inline int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { return -EINVAL; } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 257fa460b846..9654e55c38d6 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->paused = 1; } +void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) +{ + /* + * OVERWRITE is determined by perf_aux_output_end() and can't + * be passed in directly. + */ + if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE)) + return; + + handle->aux_flags |= flags; +} +EXPORT_SYMBOL_GPL(perf_aux_output_flag); + /* * This is called before hardware starts writing to the AUX area to * obtain an output handle and make sure there's room in the buffer. @@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, handle->event = event; handle->head = aux_head; handle->size = 0; + handle->aux_flags = 0; /* * In overwrite mode, AUX data stores do not depend on aux_tail, @@ -408,34 +422,32 @@ err: * of the AUX buffer management code is that after pmu::stop(), the AUX * transaction must be stopped and therefore drop the AUX reference count. */ -void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - bool wakeup = truncated; + bool wakeup = !!handle->aux_flags; unsigned long aux_head; - u64 flags = 0; - - if (truncated) - flags |= PERF_AUX_FLAG_TRUNCATED; /* in overwrite mode, driver provides aux_head via handle */ if (rb->aux_overwrite) { - flags |= PERF_AUX_FLAG_OVERWRITE; + handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; local_set(&rb->aux_head, aux_head); } else { + handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; + aux_head = local_read(&rb->aux_head); local_add(size, &rb->aux_head); } - if (size || flags) { + if (size || handle->aux_flags) { /* * Only send RECORD_AUX if we have something useful to communicate */ - perf_event_aux_event(handle->event, aux_head, size, flags); + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); } aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); @@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, } if (wakeup) { - if (truncated) + if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) handle->event->pending_disable = 1; perf_output_wakeup(handle); } -- cgit v1.2.3 From cf25f904ef75aa7c25097eb4981bbc634bf5ff9e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:21 -0600 Subject: x86/events/amd/iommu: Add IOMMU-specific hw_perf_event struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current AMD IOMMU perf PMU inappropriately uses the hardware struct inside the union in struct hw_perf_event, extra_reg in particular. Instead, introduce an AMD IOMMU-specific struct with required parameters to be programmed into the IOMMU performance counter control register. Update the pasid field from 16 to 20 bits while at it. Signed-off-by: Suravee Suthikulpanit [ Fixup macros, shorten get_next_avail_iommu_bnk_cntr() local vars, massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-10-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/iommu.c | 113 ++++++++++++++++++++------------------------ include/linux/perf_event.h | 7 +++ 2 files changed, 57 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 7ac8138023cc..f0d94c8b382a 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -23,17 +23,16 @@ #define COUNTER_SHIFT 16 -#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8)) -#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg)) - -/* iommu pmu config masks */ -#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL)) -#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL) -#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL) -#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL) -#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL) -#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) -#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) +/* iommu pmu conf masks */ +#define GET_CSOURCE(x) ((x)->conf & 0xFFULL) +#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) +#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) +#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) + +/* iommu pmu conf1 masks */ +#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) +#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) +#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) static struct perf_amd_iommu __perf_iommu; @@ -50,11 +49,11 @@ struct perf_amd_iommu { *---------------------------------------------*/ PMU_FORMAT_ATTR(csource, "config:0-7"); PMU_FORMAT_ATTR(devid, "config:8-23"); -PMU_FORMAT_ATTR(pasid, "config:24-39"); -PMU_FORMAT_ATTR(domid, "config:40-55"); +PMU_FORMAT_ATTR(domid, "config:24-39"); +PMU_FORMAT_ATTR(pasid, "config:40-59"); PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); -PMU_FORMAT_ATTR(pasid_mask, "config1:16-31"); -PMU_FORMAT_ATTR(domid_mask, "config1:32-47"); +PMU_FORMAT_ATTR(domid_mask, "config1:16-31"); +PMU_FORMAT_ATTR(pasid_mask, "config1:32-51"); static struct attribute *iommu_format_attrs[] = { &format_attr_csource.attr, @@ -150,30 +149,34 @@ static struct attribute_group amd_iommu_cpumask_group = { /*---------------------------------------------*/ -static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) +static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) { + struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); + int max_cntrs = piommu->max_counters; + int max_banks = piommu->max_banks; + u32 shift, bank, cntr; unsigned long flags; - int shift, bank, cntr, retval; - int max_banks = perf_iommu->max_banks; - int max_cntrs = perf_iommu->max_counters; + int retval; - raw_spin_lock_irqsave(&perf_iommu->lock, flags); + raw_spin_lock_irqsave(&piommu->lock, flags); for (bank = 0, shift = 0; bank < max_banks; bank++) { for (cntr = 0; cntr < max_cntrs; cntr++) { shift = bank + (bank*3) + cntr; - if (perf_iommu->cntr_assign_mask & BIT_ULL(shift)) { + if (piommu->cntr_assign_mask & BIT_ULL(shift)) { continue; } else { - perf_iommu->cntr_assign_mask |= BIT_ULL(shift); - retval = ((bank & 0xFF) << 8) | (cntr & 0xFF); + piommu->cntr_assign_mask |= BIT_ULL(shift); + event->hw.iommu_bank = bank; + event->hw.iommu_cntr = cntr; + retval = 0; goto out; } } } retval = -ENOSPC; out: - raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + raw_spin_unlock_irqrestore(&piommu->lock, flags); return retval; } @@ -202,8 +205,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, static int perf_iommu_event_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct perf_amd_iommu *perf_iommu; - u64 config, config1; /* test the event attr type check for PMU enumeration */ if (event->attr.type != event->pmu->type) @@ -225,21 +226,9 @@ static int perf_iommu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - perf_iommu = &__perf_iommu; - - if (event->pmu != &perf_iommu->pmu) - return -ENOENT; - - if (perf_iommu) { - config = event->attr.config; - config1 = event->attr.config1; - } else { - return -EINVAL; - } - /* update the hw_perf_event struct with the iommu config data */ - hwc->config = config; - hwc->extra_reg.config = config1; + hwc->conf = event->attr.config; + hwc->conf1 = event->attr.config1; return 0; } @@ -247,26 +236,28 @@ static int perf_iommu_event_init(struct perf_event *event) static void perf_iommu_enable_event(struct perf_event *ev) { struct amd_iommu *iommu = get_amd_iommu(0); - u8 csource = _GET_CSOURCE(ev); - u16 devid = _GET_DEVID(ev); - u8 bank = _GET_BANK(ev); - u8 cntr = _GET_CNTR(ev); + struct hw_perf_event *hwc = &ev->hw; + u8 bank = hwc->iommu_bank; + u8 cntr = hwc->iommu_cntr; u64 reg = 0ULL; - reg = csource; + reg = GET_CSOURCE(hwc); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); - reg = devid | (_GET_DEVID_MASK(ev) << 32); + reg = GET_DEVID_MASK(hwc); + reg = GET_DEVID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); - reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + reg = GET_PASID_MASK(hwc); + reg = GET_PASID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); - reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + reg = GET_DOMID_MASK(hwc); + reg = GET_DOMID(hwc) | (reg << 32); if (reg) reg |= BIT(31); amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); @@ -275,16 +266,16 @@ static void perf_iommu_enable_event(struct perf_event *ev) static void perf_iommu_disable_event(struct perf_event *event) { struct amd_iommu *iommu = get_amd_iommu(0); + struct hw_perf_event *hwc = &event->hw; u64 reg = 0ULL; - amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_SRC_REG, ®); } static void perf_iommu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct amd_iommu *iommu = get_amd_iommu(0); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -293,8 +284,10 @@ static void perf_iommu_start(struct perf_event *event, int flags) hwc->state = 0; if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(&hwc->prev_count); - amd_iommu_pc_set_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + u64 prev_raw_count = local64_read(&hwc->prev_count); + struct amd_iommu *iommu = get_amd_iommu(0); + + amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &prev_raw_count); } @@ -309,7 +302,7 @@ static void perf_iommu_read(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct amd_iommu *iommu = get_amd_iommu(0); - if (amd_iommu_pc_get_reg(iommu, _GET_BANK(event), _GET_CNTR(event), + if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, IOMMU_PC_COUNTER_REG, &count)) return; @@ -329,7 +322,6 @@ static void perf_iommu_read(struct perf_event *event) static void perf_iommu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - u64 config; if (hwc->state & PERF_HES_UPTODATE) return; @@ -341,7 +333,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; - config = hwc->config; perf_iommu_read(event); hwc->state |= PERF_HES_UPTODATE; } @@ -349,16 +340,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags) static int perf_iommu_add(struct perf_event *event, int flags) { int retval; - struct perf_amd_iommu *perf_iommu = - container_of(event->pmu, struct perf_amd_iommu, pmu); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; /* request an iommu bank/counter */ - retval = get_next_avail_iommu_bnk_cntr(perf_iommu); - if (retval != -ENOSPC) - event->hw.extra_reg.reg = (u16)retval; - else + retval = get_next_avail_iommu_bnk_cntr(event); + if (retval) return retval; if (flags & PERF_EF_START) @@ -369,6 +356,7 @@ static int perf_iommu_add(struct perf_event *event, int flags) static void perf_iommu_del(struct perf_event *event, int flags) { + struct hw_perf_event *hwc = &event->hw; struct perf_amd_iommu *perf_iommu = container_of(event->pmu, struct perf_amd_iommu, pmu); @@ -376,8 +364,7 @@ static void perf_iommu_del(struct perf_event *event, int flags) /* clear the assigned iommu bank/counter */ clear_avail_iommu_bnk_cntr(perf_iommu, - _GET_BANK(event), - _GET_CNTR(event)); + hwc->iommu_bank, hwc->iommu_cntr); perf_event_update_userpage(event); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b6e75c9d4791..24a635887f28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -165,6 +165,13 @@ struct hw_perf_event { struct list_head bp_list; }; #endif + struct { /* amd_iommu */ + u8 iommu_bank; + u8 iommu_cntr; + u16 padding; + u64 conf; + u64 conf1; + }; }; /* * If the event is a per task event, this will point to the task in -- cgit v1.2.3