diff options
Diffstat (limited to 'tools/perf/util')
55 files changed, 1378 insertions, 729 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 79b9498886a2..918b501f9bd8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -154,7 +154,10 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o +endif ifeq ($(CONFIG_LIBTRACEEVENT),y) perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 091987dd3966..40dcedfd75cd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -68,7 +68,11 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean highest byte */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); } else { - pr_err("unsupported address packet index: 0x%x\n", index); + static u32 seen_idx = 0; + if (!(seen_idx & BIT(index))) { + seen_idx |= BIT(index); + pr_warning("ignoring unsupported address packet index: 0x%x\n", index); + } } return payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2f311189c6e8..fed4741f372e 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -422,16 +422,18 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, int ch, pat; u64 payload = packet->payload; int err = 0; + static const char *idx_name[] = {"PC", "TGT", "VA", "PA", "PBT"}; switch (idx) { case SPE_ADDR_PKT_HDR_INDEX_INS: case SPE_ADDR_PKT_HDR_INDEX_BRANCH: + case SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH: ns = !!SPE_ADDR_PKT_GET_NS(payload); el = SPE_ADDR_PKT_GET_EL(payload); payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); + idx_name[idx], payload, el, ns); break; case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: arm_spe_pkt_out_string(&err, &buf, &buf_len, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9b970e7bf1e2..f75ed3a8a050 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -65,6 +65,7 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1 #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 #define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 +#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4 /* Address packet payload */ #define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c2e323cd7d49..498ff7f24463 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1133,6 +1133,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) if (auxtrace__dont_decode(session)) return 0; + if (perf_data__is_pipe(session->data)) + return 0; + if (!session->auxtrace || !session->auxtrace->queue_data) return -EINVAL; @@ -1391,6 +1394,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->calls = true; } else { synth_opts->instructions = true; + synth_opts->cycles = true; synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } @@ -1479,7 +1483,11 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, for (p = str; *p;) { switch (*p++) { case 'i': - synth_opts->instructions = true; + case 'y': + if (p[-1] == 'y') + synth_opts->cycles = true; + else + synth_opts->instructions = true; while (*p == ' ' || *p == ',') p += 1; if (isdigit(*p)) { @@ -1638,7 +1646,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, } } out: - if (synth_opts->instructions) { + if (synth_opts->instructions || synth_opts->cycles) { if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 2cf63d377831..29eb82dff574 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -71,6 +71,9 @@ enum itrace_period_type { * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events + * @cycles: whether to synthesize 'cycles' events + * (not fully accurate, since CYC packets are only emitted + * together with other events, such as branches) * @branches: whether to synthesize 'branches' events * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions @@ -119,6 +122,7 @@ struct itrace_synth_opts { bool default_no_sample; bool inject; bool instructions; + bool cycles; bool branches; bool transactions; bool ptwrites; @@ -643,6 +647,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ +" y[period]: synthesize cycles events (same period as i)\n" \ " b: synthesize branches events (branch misses for Arm SPE)\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ @@ -674,7 +679,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ -" concatenate multiple options. Default is ibxwpe or cewp\n" +" concatenate multiple options. Default is iybxwpe or cewp\n" static inline void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 0236334fd69b..fadcacb9d501 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -34,13 +34,15 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); - if (con->aggr_mode == LOCK_AGGR_TASK) { + if (con->aggr_mode == LOCK_AGGR_TASK) bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); - bpf_map__set_max_entries(skel->maps.stacks, 1); - } else { + else bpf_map__set_max_entries(skel->maps.task_data, 1); + + if (con->save_callstack) bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - } + else + bpf_map__set_max_entries(skel->maps.stacks, 1); if (target__has_cpu(target)) ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); @@ -146,6 +148,8 @@ int lock_contention_prepare(struct lock_contention *con) /* these don't work well if in the rodata section */ skel->bss->stack_skip = con->stack_skip; skel->bss->aggr_mode = con->aggr_mode; + skel->bss->needs_callstack = con->save_callstack; + skel->bss->lock_owner = con->owner; lock_contention_bpf__attach(skel); return 0; @@ -163,9 +167,70 @@ int lock_contention_stop(void) return 0; } +static const char *lock_contention_get_name(struct lock_contention *con, + struct contention_key *key, + u64 *stack_trace) +{ + int idx = 0; + u64 addr; + const char *name = ""; + static char name_buf[KSYM_NAME_LEN]; + struct symbol *sym; + struct map *kmap; + struct machine *machine = con->machine; + + if (con->aggr_mode == LOCK_AGGR_TASK) { + struct contention_task_data task; + int pid = key->pid; + int task_fd = bpf_map__fd(skel->maps.task_data); + + /* do not update idle comm which contains CPU number */ + if (pid) { + struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid); + + if (t == NULL) + return name; + if (!bpf_map_lookup_elem(task_fd, &pid, &task) && + thread__set_comm(t, task.comm, /*timestamp=*/0)) + name = task.comm; + } + return name; + } + + if (con->aggr_mode == LOCK_AGGR_ADDR) { + sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap); + if (sym) + name = sym->name; + return name; + } + + /* LOCK_AGGR_CALLER: skip lock internal functions */ + while (machine__is_lock_function(machine, stack_trace[idx]) && + idx < con->max_stack - 1) + idx++; + + addr = stack_trace[idx]; + sym = machine__find_kernel_symbol(machine, addr, &kmap); + + if (sym) { + unsigned long offset; + + offset = kmap->map_ip(kmap, addr) - sym->start; + + if (offset == 0) + return sym->name; + + snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); + } else { + snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); + } + + return name_buf; +} + int lock_contention_read(struct lock_contention *con) { - int fd, stack, task_fd, err = 0; + int fd, stack, err = 0; struct contention_key *prev_key, key; struct contention_data data = {}; struct lock_stat *st = NULL; @@ -175,7 +240,6 @@ int lock_contention_read(struct lock_contention *con) fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); - task_fd = bpf_map__fd(skel->maps.task_data); con->lost = skel->bss->lost; @@ -195,98 +259,75 @@ int lock_contention_read(struct lock_contention *con) prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { - struct map *kmap; - struct symbol *sym; - int idx = 0; - s32 stack_id; + s64 ls_key; + const char *name; /* to handle errors in the loop body */ err = -1; bpf_map_lookup_elem(fd, &key, &data); - st = zalloc(sizeof(*st)); - if (st == NULL) - break; - - st->nr_contended = data.count; - st->wait_time_total = data.total_time; - st->wait_time_max = data.max_time; - st->wait_time_min = data.min_time; - - if (data.count) - st->avg_wait_time = data.total_time / data.count; - - st->flags = data.flags; - st->addr = key.aggr_key; + if (con->save_callstack) { + bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); - if (con->aggr_mode == LOCK_AGGR_TASK) { - struct contention_task_data task; - struct thread *t; - int pid = key.aggr_key; + if (!match_callstack_filter(machine, stack_trace)) + goto next; + } - /* do not update idle comm which contains CPU number */ - if (st->addr) { - bpf_map_lookup_elem(task_fd, &pid, &task); - t = __machine__findnew_thread(machine, /*pid=*/-1, pid); - thread__set_comm(t, task.comm, /*timestamp=*/0); - } + switch (con->aggr_mode) { + case LOCK_AGGR_CALLER: + ls_key = key.stack_id; + break; + case LOCK_AGGR_TASK: + ls_key = key.pid; + break; + case LOCK_AGGR_ADDR: + ls_key = key.lock_addr; + break; + default: goto next; } - if (con->aggr_mode == LOCK_AGGR_ADDR) { - sym = machine__find_kernel_symbol(machine, st->addr, &kmap); - if (sym) - st->name = strdup(sym->name); + st = lock_stat_find(ls_key); + if (st != NULL) { + st->wait_time_total += data.total_time; + if (st->wait_time_max < data.max_time) + st->wait_time_max = data.max_time; + if (st->wait_time_min > data.min_time) + st->wait_time_min = data.min_time; + + st->nr_contended += data.count; + if (st->nr_contended) + st->avg_wait_time = st->wait_time_total / st->nr_contended; goto next; } - stack_id = key.aggr_key; - bpf_map_lookup_elem(stack, &stack_id, stack_trace); - - /* skip lock internal functions */ - while (machine__is_lock_function(machine, stack_trace[idx]) && - idx < con->max_stack - 1) - idx++; - - st->addr = stack_trace[idx]; - sym = machine__find_kernel_symbol(machine, st->addr, &kmap); - - if (sym) { - unsigned long offset; - int ret = 0; - - offset = kmap->map_ip(kmap, st->addr) - sym->start; + name = lock_contention_get_name(con, &key, stack_trace); + st = lock_stat_findnew(ls_key, name, data.flags); + if (st == NULL) + break; - if (offset) - ret = asprintf(&st->name, "%s+%#lx", sym->name, offset); - else - st->name = strdup(sym->name); + st->nr_contended = data.count; + st->wait_time_total = data.total_time; + st->wait_time_max = data.max_time; + st->wait_time_min = data.min_time; - if (ret < 0 || st->name == NULL) - break; - } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) { - break; - } + if (data.count) + st->avg_wait_time = data.total_time / data.count; - if (verbose > 0) { + if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); if (st->callstack == NULL) break; } + next: - hlist_add_head(&st->hash_entry, con->result); prev_key = &key; - /* we're fine now, reset the values */ - st = NULL; + /* we're fine now, reset the error */ err = 0; } free(stack_trace); - if (st) { - free(st->name); - free(st); - } return err; } diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index ad0ca5d50557..e6007eaeda1a 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -10,6 +10,14 @@ /* default buffer size */ #define MAX_ENTRIES 10240 +/* lock contention flags from include/trace/events/lock.h */ +#define LCB_F_SPIN (1U << 0) +#define LCB_F_READ (1U << 1) +#define LCB_F_WRITE (1U << 2) +#define LCB_F_RT (1U << 3) +#define LCB_F_PERCPU (1U << 4) +#define LCB_F_MUTEX (1U << 5) + struct tstamp_data { __u64 timestamp; __u64 lock; @@ -76,13 +84,23 @@ struct { __uint(max_entries, 1); } addr_filter SEC(".maps"); +struct rw_semaphore___old { + struct task_struct *owner; +} __attribute__((preserve_access_index)); + +struct rw_semaphore___new { + atomic_long_t owner; +} __attribute__((preserve_access_index)); + /* control flags */ int enabled; int has_cpu; int has_task; int has_type; int has_addr; +int needs_callstack; int stack_skip; +int lock_owner; /* determine the key of lock stat */ int aggr_mode; @@ -131,17 +149,59 @@ static inline int can_record(u64 *ctx) return 1; } -static inline void update_task_data(__u32 pid) +static inline int update_task_data(struct task_struct *task) { struct contention_task_data *p; + int pid, err; + + err = bpf_core_read(&pid, sizeof(pid), &task->pid); + if (err) + return -1; p = bpf_map_lookup_elem(&task_data, &pid); if (p == NULL) { - struct contention_task_data data; + struct contention_task_data data = {}; - bpf_get_current_comm(data.comm, sizeof(data.comm)); + BPF_CORE_READ_STR_INTO(&data.comm, task, comm); bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST); } + + return 0; +} + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags) +{ + struct task_struct *task; + __u64 owner = 0; + + if (flags & LCB_F_MUTEX) { + struct mutex *mutex = (void *)lock; + owner = BPF_CORE_READ(mutex, owner.counter); + } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) { +#if __has_builtin(bpf_core_type_matches) + if (bpf_core_type_matches(struct rw_semaphore___old)) { + struct rw_semaphore___old *rwsem = (void *)lock; + owner = (unsigned long)BPF_CORE_READ(rwsem, owner); + } else if (bpf_core_type_matches(struct rw_semaphore___new)) { + struct rw_semaphore___new *rwsem = (void *)lock; + owner = BPF_CORE_READ(rwsem, owner.counter); + } +#else + /* assume new struct */ + struct rw_semaphore *rwsem = (void *)lock; + owner = BPF_CORE_READ(rwsem, owner.counter); +#endif + } + + if (!owner) + return NULL; + + task = (void *)(owner & ~7UL); + return task; } SEC("tp_btf/contention_begin") @@ -173,11 +233,31 @@ int contention_begin(u64 *ctx) pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; - if (aggr_mode == LOCK_AGGR_CALLER) { + if (needs_callstack) { pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) lost++; + } else if (aggr_mode == LOCK_AGGR_TASK) { + struct task_struct *task; + + if (lock_owner) { + task = get_lock_owner(pelem->lock, pelem->flags); + + /* The flags is not used anymore. Pass the owner pid. */ + if (task) + pelem->flags = BPF_CORE_READ(task, pid); + else + pelem->flags = -1U; + + } else { + task = bpf_get_current_task_btf(); + } + + if (task) { + if (update_task_data(task) < 0 && lock_owner) + pelem->flags = -1U; + } } return 0; @@ -188,7 +268,7 @@ int contention_end(u64 *ctx) { __u32 pid; struct tstamp_data *pelem; - struct contention_key key; + struct contention_key key = {}; struct contention_data *data; __u64 duration; @@ -204,14 +284,20 @@ int contention_end(u64 *ctx) switch (aggr_mode) { case LOCK_AGGR_CALLER: - key.aggr_key = pelem->stack_id; + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_TASK: - key.aggr_key = pid; - update_task_data(pid); + if (lock_owner) + key.pid = pelem->flags; + else + key.pid = pid; + if (needs_callstack) + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_ADDR: - key.aggr_key = pelem->lock; + key.lock_addr = pelem->lock; + if (needs_callstack) + key.stack_id = pelem->stack_id; break; default: /* should not happen */ diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index ce71cf1a7e1e..3d35fd4407ac 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -4,7 +4,9 @@ #define UTIL_BPF_SKEL_LOCK_DATA_H struct contention_key { - u64 aggr_key; /* can be stack_id, pid or lock addr */ + u32 stack_id; + u32 pid; + u64 lock_addr; }; #define TASK_COMM_LEN 16 diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 6d38238481d3..378f16a24751 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -212,3 +212,18 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) return printed; } + +const char *branch_spec_desc(int spec) +{ + const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = { + "N/A", + "SPEC_WRONG_PATH", + "NON_SPEC_CORRECT_PATH", + "SPEC_CORRECT_PATH", + }; + + if (spec >= 0 && spec < PERF_BR_SPEC_MAX) + return branch_spec_outcomes[spec]; + + return NULL; +} diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 3ed792db1125..e41bfffe2217 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -89,4 +89,6 @@ const char *get_branch_type(struct branch_entry *e); void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); +const char *branch_spec_desc(int spec); + #endif /* _PERF_BRANCH_H */ diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h index dec8c0fb1f4a..fe6d5b60a031 100644 --- a/tools/perf/util/cacheline.h +++ b/tools/perf/util/cacheline.h @@ -6,16 +6,31 @@ int __pure cacheline_size(void); -static inline u64 cl_address(u64 address) + +/* + * Some architectures have 'Adjacent Cacheline Prefetch' feature, + * which performs like the cacheline size being doubled. + */ +static inline u64 cl_address(u64 address, bool double_cl) { + u64 size = cacheline_size(); + + if (double_cl) + size *= 2; + /* return the cacheline of the address */ - return (address & ~(cacheline_size() - 1)); + return (address & ~(size - 1)); } -static inline u64 cl_offset(u64 address) +static inline u64 cl_offset(u64 address, bool double_cl) { - /* return the cacheline of the address */ - return (address & (cacheline_size() - 1)); + u64 size = cacheline_size(); + + if (double_cl) + size *= 2; + + /* return the offset inside cacheline */ + return (address & (size - 1)); } #endif // PERF_CACHELINE_H diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index cd978c240e0d..bfb13306d82c 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -481,7 +481,6 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, nr_cgroups++; if (metric_events) { - perf_stat__collect_metric_expr(tmp_list); if (metricgroup__copy_metric_events(tmp_list, cgrp, metric_events, &orig_metric_events) < 0) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 1a3ff6449158..e08797c3cdbc 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -422,8 +422,6 @@ void numa_topology__delete(struct numa_topology *tp) static int load_hybrid_node(struct hybrid_topology_node *node, struct perf_pmu *pmu) { - const char *sysfs; - char path[PATH_MAX]; char *buf = NULL, *p; FILE *fp; size_t len = 0; @@ -432,12 +430,7 @@ static int load_hybrid_node(struct hybrid_topology_node *node, if (!node->pmu_name) return -1; - sysfs = sysfs__mountpoint(); - if (!sysfs) - goto err; - - snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name); - fp = fopen(path, "r"); + fp = perf_pmu__open_file(pmu, "cpus"); if (!fp) goto err; diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c index 597542410854..5f48b756c4cf 100644 --- a/tools/perf/util/cs-etm-base.c +++ b/tools/perf/util/cs-etm-base.c @@ -36,7 +36,22 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", - [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" + [CS_ETMV4_TS_SOURCE] = " TS_SOURCE %lld\n", +}; + +static const char * const cs_ete_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", + [CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n", + [CS_ETE_TS_SOURCE] = " TS_SOURCE %lld\n", }; static const char * const param_unk_fmt = @@ -96,19 +111,22 @@ static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset) else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); } - } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { - /* - * ETE and ETMv4 can be printed in the same block because the number of parameters - * is saved and they share the list of parameter names. ETE is also only supported - * in V1 files. - */ + } else if (magic == __perf_cs_etmv4_magic) { for (j = 0; j < total_params; j++, i++) { /* if newer record - could be excess params */ - if (j >= CS_ETE_PRIV_MAX) + if (j >= CS_ETMV4_PRIV_MAX) fprintf(stdout, param_unk_fmt, j, val[i]); else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); } + } else if (magic == __perf_cs_ete_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETE_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else + fprintf(stdout, cs_ete_priv_fmts[j], val[i]); + } } else { /* failure - note bad magic value and error out */ fprintf(stdout, magic_unk_fmt, magic); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 31fa3b45134a..d0e521dfcf35 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -30,6 +30,15 @@ #endif #endif +/* + * Assume a maximum of 0.1ns elapsed per instruction. This would be the + * case with a theoretical 10GHz core executing 1 instruction per cycle. + * Used to estimate the sample time for synthesized instructions because + * Coresight only emits a timestamp for a range of instructions rather + * than per instruction. + */ +const u32 INSTR_PER_NS = 10; + struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); @@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, return 1; } +/* + * Calculate the number of nanoseconds elapsed. + * + * instr_count is updated in place with the remainder of the instructions + * which didn't make up a whole nanosecond. + */ +static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count) +{ + const u32 instr_copy = *instr_count; + + *instr_count %= INSTR_PER_NS; + return instr_copy / INSTR_PER_NS; +} + static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, ocsd_etmv3_cfg *config) { @@ -260,15 +283,17 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, const uint8_t trace_chan_id) { + u64 estimated_ts; + /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->cs_timestamp) + if (!packet_queue->next_cs_timestamp) return OCSD_RESP_CONT; - packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; + estimated_ts = packet_queue->cs_timestamp + + cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count); - /* Estimate the timestamp for the next range packet */ - packet_queue->next_cs_timestamp += packet_queue->instr_count; - packet_queue->instr_count = 0; + /* Estimated TS can never be higher than the next real one in the trace */ + packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts); /* Tell the front end which traceid_queue needs attention */ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); @@ -283,6 +308,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; + u64 converted_timestamp; + u64 estimated_first_ts; /* First get the packet queue for this traceID */ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); @@ -290,17 +317,28 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, return OCSD_RESP_FATAL_SYS_ERR; /* + * Coresight timestamps are raw timer values which need to be scaled to ns. Assume + * 0 is a bad value so don't try to convert it. + */ + converted_timestamp = elem->timestamp ? + cs_etm__convert_sample_time(etmq, elem->timestamp) : 0; + + /* * We've seen a timestamp packet before - simply record the new value. * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->cs_timestamp) { - packet_queue->next_cs_timestamp = elem->timestamp; + if (packet_queue->next_cs_timestamp) { + /* + * What was next is now where new ranges start from, overwriting + * any previous estimate in cs_timestamp + */ + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; + packet_queue->next_cs_timestamp = converted_timestamp; return OCSD_RESP_CONT; } - - if (!elem->timestamp) { + if (!converted_timestamp) { /* * Zero timestamps can be seen due to misconfiguration or hardware bugs. * Warn once, and don't try to subtract instr_count as it would result in an @@ -312,7 +350,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", indx); - } else if (packet_queue->instr_count > elem->timestamp) { + } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) { /* * Sanity check that the elem->timestamp - packet_queue->instr_count would not * result in an underflow. Warn and clamp at 0 if it would. @@ -325,11 +363,14 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * or a discontinuity. Since timestamps packets are generated *after* * range packets have been generated, we need to estimate the time at * which instructions started by subtracting the number of instructions - * executed to the timestamp. + * executed to the timestamp. Don't estimate earlier than the last used + * timestamp though. */ - packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + estimated_first_ts = converted_timestamp - + (packet_queue->instr_count / INSTR_PER_NS); + packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts); } - packet_queue->next_cs_timestamp = elem->timestamp; + packet_queue->next_cs_timestamp = converted_timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -342,7 +383,6 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->cs_timestamp = 0; packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } @@ -604,6 +644,9 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_CUSTOM: case OCSD_GEN_TRC_ELEM_SYNC_MARKER: case OCSD_GEN_TRC_ELEM_MEMTRANS: +#if (OCSD_VER_NUM >= 0x010400) + case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: +#endif default: break; } diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 33303d03c2fa..f65bac5ddbdb 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -35,6 +35,7 @@ #include "tool.h" #include "thread.h" #include "thread-stack.h" +#include "tsc.h" #include <tools/libc_compat.h> #include "util/synthetic-events.h" @@ -46,10 +47,12 @@ struct cs_etm_auxtrace { struct perf_session *session; struct machine *machine; struct thread *unknown_thread; + struct perf_tsc_conversion tc; u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; + u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ int num_cpu; u64 latest_kernel_timestamp; @@ -464,12 +467,12 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, u64 **metadata = etm->metadata; t_params[idx].protocol = CS_ETM_PROTO_ETE; - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; - t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; } @@ -1161,6 +1164,30 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq, sample->insn_len, (void *)sample->insn); } +u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + + if (etm->has_virtual_ts) + return tsc_to_perf_time(cs_timestamp, &etm->tc); + else + return cs_timestamp; +} + +static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; + + if (etm->timeless_decoding) + return 0; + else if (etm->has_virtual_ts) + return packet_queue->cs_timestamp; + else + return etm->latest_kernel_timestamp; +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) @@ -1174,8 +1201,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); - if (!etm->timeless_decoding) - sample.time = etm->latest_kernel_timestamp; + /* Set time field based on etm auxtrace config. */ + sample.time = cs_etm__resolve_sample_time(etmq, tidq); + sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1232,8 +1260,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - if (!etm->timeless_decoding) - sample.time = etm->latest_kernel_timestamp; + /* Set time field based on etm auxtrace config. */ + sample.time = cs_etm__resolve_sample_time(etmq, tidq); + sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -2746,12 +2775,42 @@ static int cs_etm__queue_aux_records(struct perf_session *session) return 0; } +#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ + (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) + +/* + * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual + * timestamps). + */ +static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) +{ + int j; + + for (j = 0; j < num_cpu; j++) { + switch (metadata[j][CS_ETM_MAGIC]) { + case __perf_cs_etmv4_magic: + if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) + return false; + break; + case __perf_cs_ete_magic: + if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) + return false; + break; + default: + /* Unknown / unsupported magic number. */ + return false; + } + } + return true; +} + int cs_etm__process_auxtrace_info_full(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; struct int_node *inode; + struct perf_record_time_conv *tc = &session->time_conv; int event_header_size = sizeof(struct perf_event_header); int total_size = auxtrace_info->header.size; int priv_size = 0; @@ -2886,6 +2945,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->auxtrace_type = auxtrace_info->type; etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + + if (!etm->has_virtual_ts) + ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" + "The time field of the samples will not be set accurately.\n\n"); + etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; etm->auxtrace.flush_events = cs_etm__flush_events; @@ -2915,6 +2981,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, goto err_delete_thread; } + etm->tc.time_shift = tc->time_shift; + etm->tc.time_mult = tc->time_mult; + etm->tc.time_zero = tc->time_zero; + if (event_contains(*tc, time_cycles)) { + etm->tc.time_cycles = tc->time_cycles; + etm->tc.time_mask = tc->time_mask; + etm->tc.cap_user_time_zero = tc->cap_user_time_zero; + etm->tc.cap_user_time_short = tc->cap_user_time_short; + } err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5da50d5dae6b..98a4f7113d2f 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -71,6 +71,7 @@ enum { CS_ETMV4_TRCIDR2, CS_ETMV4_TRCIDR8, CS_ETMV4_TRCAUTHSTATUS, + CS_ETMV4_TS_SOURCE, CS_ETMV4_PRIV_MAX, }; @@ -82,7 +83,17 @@ enum { * added in header V1 */ enum { - CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + /* Dynamic, configurable parameters */ + CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, + CS_ETE_TRCTRACEIDR, + /* RO, taken from sysFS */ + CS_ETE_TRCIDR0, + CS_ETE_TRCIDR1, + CS_ETE_TRCIDR2, + CS_ETE_TRCIDR8, + CS_ETE_TRCAUTHSTATUS, + CS_ETE_TRCDEVARCH, + CS_ETE_TS_SOURCE, CS_ETE_PRIV_MAX }; @@ -181,7 +192,7 @@ struct cs_etm_packet_queue { u32 head; u32 tail; u32 instr_count; - u64 cs_timestamp; + u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */ u64 next_cs_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; @@ -220,6 +231,7 @@ struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, struct perf_session *session __maybe_unused); +u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp); #else static inline int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index b842273458b8..2b732bccabad 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -322,10 +322,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(flags)) offset += fmtf->offset + fmtf->size; -#endif } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 190e818a0717..88378c4c5dd9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -19,12 +19,19 @@ #include "debug.h" #include "print_binary.h" #include "target.h" +#include "trace-event.h" #include "ui/helpline.h" #include "ui/ui.h" #include "util/parse-sublevel-options.h" #include <linux/ctype.h> +#ifdef HAVE_LIBTRACEEVENT +#include <traceevent/event-parse.h> +#else +#define LIBTRACEEVENT_VERSION 0 +#endif + int verbose; int debug_peo_args; bool dump_trace = false, quiet = false; @@ -228,6 +235,14 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) + if (verbose == 1) + tep_set_loglevel(TEP_LOG_INFO); + else if (verbose == 2) + tep_set_loglevel(TEP_LOG_DEBUG); + else if (verbose >= 3) + tep_set_loglevel(TEP_LOG_ALL); +#endif return 0; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 999dd1700502..51e8ce6edddc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -285,8 +285,6 @@ void evsel__init(struct evsel *evsel, evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; - evsel->metric_expr = NULL; - evsel->metric_name = NULL; evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; @@ -2319,7 +2317,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) * abort:1 //transaction abort * cycles:16 //cycle count to last branch * type:4 //branch type - * reserved:40 + * spec:2 //branch speculation info + * new_type:4 //additional branch type + * priv:3 //privilege level + * reserved:31 * } * } * @@ -2335,7 +2336,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) new_val |= bitfield_swap(value, 3, 1); new_val |= bitfield_swap(value, 4, 16); new_val |= bitfield_swap(value, 20, 4); - new_val |= bitfield_swap(value, 24, 40); + new_val |= bitfield_swap(value, 24, 2); + new_val |= bitfield_swap(value, 26, 4); + new_val |= bitfield_swap(value, 30, 3); + new_val |= bitfield_swap(value, 33, 31); } else { new_val = bitfield_swap(value, 63, 1); new_val |= bitfield_swap(value, 62, 1); @@ -2343,7 +2347,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) new_val |= bitfield_swap(value, 60, 1); new_val |= bitfield_swap(value, 44, 16); new_val |= bitfield_swap(value, 40, 4); - new_val |= bitfield_swap(value, 0, 40); + new_val |= bitfield_swap(value, 38, 2); + new_val |= bitfield_swap(value, 34, 4); + new_val |= bitfield_swap(value, 31, 3); + new_val |= bitfield_swap(value, 0, 31); } return new_val; @@ -2784,10 +2791,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } return sample->raw_data + offset; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d572be41b960..24cb807ef6ce 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -105,8 +105,6 @@ struct evsel { * metric fields are similar, but needs more care as they can have * references to other metric (evsel). */ - const char * metric_expr; - const char * metric_name; struct evsel **metric_events; struct evsel *metric_leader; diff --git a/tools/perf/util/evswitch.h b/tools/perf/util/evswitch.h index fd30460b6218..8ffdbe526d98 100644 --- a/tools/perf/util/evswitch.h +++ b/tools/perf/util/evswitch.h @@ -22,9 +22,9 @@ bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel); #define OPTS_EVSWITCH(evswitch) \ OPT_STRING(0, "switch-on", &(evswitch)->on_name, \ - "event", "Consider events after the ocurrence of this event"), \ + "event", "Consider events after the occurrence of this event"), \ OPT_STRING(0, "switch-off", &(evswitch)->off_name, \ - "event", "Stop considering events after the ocurrence of this event"), \ + "event", "Stop considering events after the occurrence of this event"), \ OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events, \ "Show the on/off switch events, used with --switch-on and --switch-off") diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 00dcde35e0d3..c1da20b868db 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -19,6 +19,7 @@ #include <linux/zalloc.h> #include <ctype.h> #include <math.h> +#include "pmu.h" #ifdef PARSER_DEBUG extern int expr_debug; @@ -448,6 +449,10 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx result = topology->core_cpus_lists; goto out; } + if (!strcmp("#slots", literal)) { + result = perf_pmu__cpu_slots_per_cycle(); + goto out; + } pr_err("Unrecognized literal '%s'", literal); out: diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 029271540fb0..eaa44b24c555 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -9,6 +9,7 @@ struct expr_scanner_ctx { char *user_requested_cpu_list; int runtime; bool system_wide; + bool is_test; }; struct expr_parse_ctx { diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index d47de5f270a8..4fbf353e78e7 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -87,9 +87,11 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx) YYSTYPE *yylval = expr_get_lval(scanner); yylval->num = expr__get_literal(expr_get_text(scanner), sctx); - if (isnan(yylval->num)) - return EXPR_ERROR; - + if (isnan(yylval->num)) { + if (!sctx->is_test) + return EXPR_ERROR; + yylval->num = 1; + } return LITERAL; } %} diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6d3921627e33..955c1b9dc6a4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -5,6 +5,7 @@ */ #include <inttypes.h> +#include <linux/perf_event.h> #include <stdio.h> #include <stdbool.h> #include <errno.h> @@ -98,6 +99,10 @@ struct intel_pt { u64 instructions_sample_type; u64 instructions_id; + bool sample_cycles; + u64 cycles_sample_type; + u64 cycles_id; + bool sample_branches; u32 branches_filter; u64 branches_sample_type; @@ -214,6 +219,8 @@ struct intel_pt_queue { u64 ipc_cyc_cnt; u64 last_in_insn_cnt; u64 last_in_cyc_cnt; + u64 last_cy_insn_cnt; + u64 last_cy_cyc_cnt; u64 last_br_insn_cnt; u64 last_br_cyc_cnt; unsigned int cbr_seen; @@ -1319,7 +1326,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; - if (pt->synth_opts.instructions) { + if (pt->synth_opts.instructions || pt->synth_opts.cycles) { if (pt->synth_opts.period) { switch (pt->synth_opts.period_type) { case PERF_ITRACE_PERIOD_INSTRUCTIONS: @@ -1830,6 +1837,33 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) pt->instructions_sample_type); } +static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + u64 period = 0; + + if (ptq->sample_ipc) + period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; + + if (!period || intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->cycles_id; + sample.stream_id = ptq->pt->cycles_id; + sample.period = period; + + sample.cyc_cnt = period; + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt; + ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; + + return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); +} + static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -2598,10 +2632,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } } - if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { - err = intel_pt_synth_instruction_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_INSTRUCTION) { + if (pt->sample_instructions) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + if (pt->sample_cycles) { + err = intel_pt_synth_cycle_sample(ptq); + if (err) + return err; + } } if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { @@ -3731,6 +3772,22 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } + if (pt->synth_opts.cycles) { + attr.config = PERF_COUNT_HW_CPU_CYCLES; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + err = intel_pt_synth_event(session, "cycles", &attr, id); + if (err) + return err; + pt->sample_cycles = true; + pt->cycles_sample_type = attr.sample_type; + pt->cycles_id = id; + id += 1; + } + attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; attr.sample_period = 1; @@ -4379,6 +4436,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_setup_pebs_events(pt); + if (perf_data__is_pipe(session->data)) { + pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n" + " The output cannot relied upon. In particular,\n" + " timestamps and the order of events may be incorrect.\n"); + } + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) err = auxtrace_queue_data(session, true, true); else diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 650ffe336f3a..4e8e243a6e4b 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -531,14 +531,37 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, pr_debug("llvm compiling command template: %s\n", template); + /* + * Below, substitute control characters for values that can cause the + * echo to misbehave, then substitute the values back. + */ err = -ENOMEM; - if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0) + if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0) goto errout; +#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0) + for (char *p = command_echo; *p; p++) { + SWAP_CHAR('<', '\001'); + SWAP_CHAR('>', '\002'); + SWAP_CHAR('"', '\003'); + SWAP_CHAR('\'', '\004'); + SWAP_CHAR('|', '\005'); + SWAP_CHAR('&', '\006'); + SWAP_CHAR('\a', '"'); + } err = read_from_pipe(command_echo, (void **) &command_out, NULL); if (err) goto errout; + for (char *p = command_out; *p; p++) { + SWAP_CHAR('\001', '<'); + SWAP_CHAR('\002', '>'); + SWAP_CHAR('\003', '"'); + SWAP_CHAR('\004', '\''); + SWAP_CHAR('\005', '|'); + SWAP_CHAR('\006', '&'); + } +#undef SWAP_CHAR pr_debug("llvm compiling command : %s\n", command_out); err = read_from_pipe(template, &obj_buf, &obj_buf_sz); diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b99e83fccf5c..040b618b2215 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -65,6 +65,11 @@ struct lock_stat { */ #define MAX_LOCK_DEPTH 48 +struct lock_stat *lock_stat_find(u64 addr); +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); + +bool match_callstack_filter(struct machine *machine, u64 *callstack); + /* * struct lock_seq_stat: * Place to put on state of one lock sequence @@ -128,6 +133,8 @@ struct lock_contention { int max_stack; int stack_skip; int aggr_mode; + int owner; + bool save_callstack; }; #ifdef HAVE_BPF_SKEL diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b9c273ed080a..f3559be95541 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -167,14 +167,14 @@ static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metricgroup__has_constraint(const struct pmu_event *pe) +static bool metricgroup__has_constraint(const struct pmu_metric *pm) { - if (!pe->metric_constraint) + if (!pm->metric_constraint) return false; - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + if (!strcmp(pm->metric_constraint, "NO_NMI_WATCHDOG") && sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); + metricgroup___watchdog_constraint_hint(pm->metric_name, false); return true; } @@ -193,7 +193,7 @@ static void metric__free(struct metric *m) free(m); } -static struct metric *metric__new(const struct pmu_event *pe, +static struct metric *metric__new(const struct pmu_metric *pm, const char *modifier, bool metric_no_group, int runtime, @@ -210,15 +210,15 @@ static struct metric *metric__new(const struct pmu_event *pe, if (!m->pctx) goto out_err; - m->metric_name = pe->metric_name; + m->metric_name = pm->metric_name; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); if (!m->modifier) goto out_err; } - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; + m->metric_expr = pm->metric_expr; + m->metric_unit = pm->unit; m->pctx->sctx.user_requested_cpu_list = NULL; if (user_requested_cpu_list) { m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list); @@ -227,7 +227,7 @@ static struct metric *metric__new(const struct pmu_event *pe, } m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + m->has_constraint = metric_no_group || metricgroup__has_constraint(pm); m->metric_refs = NULL; m->evlist = NULL; @@ -348,10 +348,10 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(const struct pmu_event *pe, const char *metric) +static bool match_pm_metric(const struct pmu_metric *pm, const char *metric) { - return match_metric(pe->metric_group, metric) || - match_metric(pe->metric_name, metric); + return match_metric(pm->metric_group, metric) || + match_metric(pm->metric_name, metric); } /** struct mep - RB-tree node for building printing information. */ @@ -420,13 +420,13 @@ static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, return NULL; } -static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, +static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, struct rblist *groups) { const char *g; char *omg, *mg; - mg = strdup(pe->metric_group ?: "No_group"); + mg = strdup(pm->metric_group ?: "No_group"); if (!mg) return -ENOMEM; omg = mg; @@ -435,15 +435,15 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, g = skip_spaces(g); if (strlen(g)) - me = mep_lookup(groups, g, pe->metric_name); + me = mep_lookup(groups, g, pm->metric_name); else - me = mep_lookup(groups, "No_group", pe->metric_name); + me = mep_lookup(groups, "No_group", pm->metric_name); if (me) { - me->metric_desc = pe->desc; - me->metric_long_desc = pe->long_desc; - me->metric_expr = pe->metric_expr; - me->metric_unit = pe->unit; + me->metric_desc = pm->desc; + me->metric_long_desc = pm->long_desc; + me->metric_expr = pm->metric_expr; + me->metric_unit = pm->unit; } } free(omg); @@ -452,64 +452,61 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, } struct metricgroup_iter_data { - pmu_event_iter_fn fn; + pmu_metric_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(const struct pmu_event *pe, - const struct pmu_events_table *table, +static int metricgroup__sys_event_iter(const struct pmu_metric *pm, + const struct pmu_metrics_table *table, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; - if (!pe->metric_expr || !pe->compat) + if (!pm->metric_expr || !pm->compat) return 0; while ((pmu = perf_pmu__scan(pmu))) { - if (!pmu->id || strcmp(pmu->id, pe->compat)) + if (!pmu->id || strcmp(pmu->id, pm->compat)) continue; - return d->fn(pe, table, d->data); + return d->fn(pm, table, d->data); } return 0; } -static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *vdata) +static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) { struct rblist *groups = vdata; - if (!pe->metric_name) - return 0; - - return metricgroup__add_to_mep_groups(pe, groups); + return metricgroup__add_to_mep_groups(pm, groups); } void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) { struct rblist groups; - const struct pmu_events_table *table; + const struct pmu_metrics_table *table; struct rb_node *node, *next; rblist__init(&groups); groups.node_new = mep_new; groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; - table = pmu_events_table__find(); + table = pmu_metrics_table__find(); if (table) { - pmu_events_table_for_each_event(table, - metricgroup__add_to_mep_groups_callback, - &groups); + pmu_metrics_table_for_each_metric(table, + metricgroup__add_to_mep_groups_callback, + &groups); } { struct metricgroup_iter_data data = { .fn = metricgroup__add_to_mep_groups_callback, .data = &groups, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); } for (node = rb_first_cached(&groups.entries); node; node = next) { @@ -743,7 +740,7 @@ static int metricgroup__build_event_string(struct strbuf *events, #undef RETURN_IF_NON_ZERO } -int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) +int __weak arch_get_runtimeparam(const struct pmu_metric *pm __maybe_unused) { return 1; } @@ -768,22 +765,22 @@ struct metricgroup_add_iter_data { bool system_wide; struct metric *root_metric; const struct visited_metric *visited; - const struct pmu_events_table *table; + const struct pmu_metrics_table *table; }; static bool metricgroup__find_metric(const char *metric, - const struct pmu_events_table *table, - struct pmu_event *pe); + const struct pmu_metrics_table *table, + struct pmu_metric *pm); static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, const char *user_requested_cpu_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table); + const struct pmu_metrics_table *table); /** * resolve_metric - Locate metrics within the root metric and recursively add @@ -810,16 +807,16 @@ static int resolve_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { struct hashmap_entry *cur; size_t bkt; struct to_resolve { /* The metric to resolve. */ - struct pmu_event pe; + struct pmu_metric pm; /* * The key in the IDs map, this may differ from in case, - * etc. from pe->metric_name. + * etc. from pm->metric_name. */ const char *key; } *pending = NULL; @@ -830,15 +827,15 @@ static int resolve_metric(struct list_head *metric_list, * the pending array. */ hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { - struct pmu_event pe; + struct pmu_metric pm; - if (metricgroup__find_metric(cur->pkey, table, &pe)) { + if (metricgroup__find_metric(cur->pkey, table, &pm)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) return -ENOMEM; - memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe)); + memcpy(&pending[pending_cnt].pm, &pm, sizeof(pm)); pending[pending_cnt].key = cur->pkey; pending_cnt++; } @@ -853,7 +850,7 @@ static int resolve_metric(struct list_head *metric_list, * context. */ for (i = 0; i < pending_cnt; i++) { - ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group, + ret = add_metric(metric_list, &pending[i].pm, modifier, metric_no_group, user_requested_cpu_list, system_wide, root_metric, visited, table); if (ret) @@ -867,7 +864,7 @@ static int resolve_metric(struct list_head *metric_list, /** * __add_metric - Add a metric to metric_list. * @metric_list: The list the metric is added to. - * @pe: The pmu_event containing the metric to be added. + * @pm: The pmu_metric containing the metric to be added. * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the @@ -884,7 +881,7 @@ static int resolve_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int __add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, int runtime, @@ -892,19 +889,19 @@ static int __add_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { const struct visited_metric *vm; int ret; bool is_root = !root_metric; struct visited_metric visited_node = { - .name = pe->metric_name, + .name = pm->metric_name, .parent = visited, }; for (vm = visited; vm; vm = vm->parent) { - if (!strcmp(pe->metric_name, vm->name)) { - pr_err("failed: recursion detected for %s\n", pe->metric_name); + if (!strcmp(pm->metric_name, vm->name)) { + pr_err("failed: recursion detected for %s\n", pm->metric_name); return -1; } } @@ -914,7 +911,7 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pe, modifier, metric_no_group, runtime, + root_metric = metric__new(pm, modifier, metric_no_group, runtime, user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; @@ -929,7 +926,7 @@ static int __add_metric(struct list_head *metric_list, */ if (root_metric->metric_refs) { for (; root_metric->metric_refs[cnt].metric_name; cnt++) { - if (!strcmp(pe->metric_name, + if (!strcmp(pm->metric_name, root_metric->metric_refs[cnt].metric_name)) return 0; } @@ -947,8 +944,8 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - root_metric->metric_refs[cnt].metric_name = pe->metric_name; - root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; + root_metric->metric_refs[cnt].metric_name = pm->metric_name; + root_metric->metric_refs[cnt].metric_expr = pm->metric_expr; /* Null terminate array. */ root_metric->metric_refs[cnt+1].metric_name = NULL; @@ -959,7 +956,7 @@ static int __add_metric(struct list_head *metric_list, * For both the parent and referenced metrics, we parse * all the metric's IDs and add it to the root context. */ - if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { + if (expr__find_ids(pm->metric_expr, NULL, root_metric->pctx) < 0) { /* Broken metric. */ ret = -EINVAL; } else { @@ -981,57 +978,57 @@ static int __add_metric(struct list_head *metric_list, struct metricgroup__find_metric_data { const char *metric; - struct pmu_event *pe; + struct pmu_metric *pm; }; -static int metricgroup__find_metric_callback(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, void *vdata) { struct metricgroup__find_metric_data *data = vdata; - if (!match_metric(pe->metric_name, data->metric)) + if (!match_metric(pm->metric_name, data->metric)) return 0; - memcpy(data->pe, pe, sizeof(*pe)); + memcpy(data->pm, pm, sizeof(*pm)); return 1; } static bool metricgroup__find_metric(const char *metric, - const struct pmu_events_table *table, - struct pmu_event *pe) + const struct pmu_metrics_table *table, + struct pmu_metric *pm) { struct metricgroup__find_metric_data data = { .metric = metric, - .pe = pe, + .pm = pm, }; - return pmu_events_table_for_each_event(table, metricgroup__find_metric_callback, &data) + return pmu_metrics_table_for_each_metric(table, metricgroup__find_metric_callback, &data) ? true : false; } static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, const char *user_requested_cpu_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { int ret = 0; - pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + pr_debug("metric expr %s for %s\n", pm->metric_expr, pm->metric_name); - if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, + if (!strstr(pm->metric_expr, "?")) { + ret = __add_metric(metric_list, pm, modifier, metric_no_group, 0, user_requested_cpu_list, system_wide, root_metric, visited, table); } else { int j, count; - count = arch_get_runtimeparam(pe); + count = arch_get_runtimeparam(pm); /* This loop is added to create multiple * events depend on count value and add @@ -1039,7 +1036,7 @@ static int add_metric(struct list_head *metric_list, */ for (j = 0; j < count && !ret; j++) - ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, + ret = __add_metric(metric_list, pm, modifier, metric_no_group, j, user_requested_cpu_list, system_wide, root_metric, visited, table); } @@ -1047,17 +1044,17 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *data) +static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *data) { struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pe_metric(pe, d->metric_name)) + if (!match_pm_metric(pm, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, + ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, d->user_requested_cpu_list, d->system_wide, d->root_metric, d->visited, d->table); if (ret) @@ -1107,19 +1104,19 @@ struct metricgroup__add_metric_data { bool has_match; }; -static int metricgroup__add_metric_callback(const struct pmu_event *pe, - const struct pmu_events_table *table, +static int metricgroup__add_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table, void *vdata) { struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pe->metric_expr && - (match_metric(pe->metric_group, data->metric_name) || - match_metric(pe->metric_name, data->metric_name))) { + if (pm->metric_expr && + (match_metric(pm->metric_group, data->metric_name) || + match_metric(pm->metric_name, data->metric_name))) { data->has_match = true; - ret = add_metric(data->list, pe, data->modifier, data->metric_no_group, + ret = add_metric(data->list, pm, data->modifier, data->metric_no_group, data->user_requested_cpu_list, data->system_wide, /*root_metric=*/NULL, /*visited_metrics=*/NULL, table); } @@ -1146,7 +1143,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { LIST_HEAD(list); int ret; @@ -1166,8 +1163,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier * Iterate over all metrics seeing if metric matches either the * name or group. When it does add the metric to the list. */ - ret = pmu_events_table_for_each_event(table, metricgroup__add_metric_callback, - &data); + ret = pmu_metrics_table_for_each_metric(table, metricgroup__add_metric_callback, + &data); if (ret) goto out; @@ -1189,7 +1186,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier }, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ if (!has_match) @@ -1222,7 +1219,7 @@ out: static int metricgroup__add_metric_list(const char *list, bool metric_no_group, const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { char *list_itr, *list_copy, *metric_name, *modifier; int ret, count = 0; @@ -1432,7 +1429,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, bool system_wide, struct perf_pmu *fake_pmu, struct rblist *metric_events_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); @@ -1580,7 +1577,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool system_wide, struct rblist *metric_events) { - const struct pmu_events_table *table = pmu_events_table__find(); + const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return -EINVAL; @@ -1591,7 +1588,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, } int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1603,16 +1600,16 @@ int metricgroup__parse_groups_test(struct evlist *evlist, &perf_pmu__fake, metric_events, table); } -static int metricgroup__has_metric_callback(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, +static int metricgroup__has_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, void *vdata) { const char *metric = vdata; - if (!pe->metric_expr) + if (!pm->metric_expr) return 0; - if (match_metric(pe->metric_name, metric)) + if (match_metric(pm->metric_name, metric)) return 1; return 0; @@ -1620,13 +1617,13 @@ static int metricgroup__has_metric_callback(const struct pmu_event *pe, bool metricgroup__has_metric(const char *metric) { - const struct pmu_events_table *table = pmu_events_table__find(); + const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return false; - return pmu_events_table_for_each_event(table, metricgroup__has_metric_callback, - (void *)metric) ? true : false; + return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, + (void *)metric) ? true : false; } int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 0013cf582173..84030321a057 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -73,7 +73,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool system_wide, struct rblist *metric_events); int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, const char *str, bool metric_no_group, bool metric_no_merge, @@ -81,7 +81,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 21cce83462b3..0336ff27c15f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1570,8 +1570,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; - evsel->metric_expr = info.metric_expr; - evsel->metric_name = info.metric_name; return 0; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index ac3227ba769c..b59ba825ddc9 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -193,8 +193,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", info->desc, /*long_desc=*/NULL, - /*encoding_desc=*/buf->buf, - /*metric_name=*/NULL, /*metric_expr=*/NULL); + /*encoding_desc=*/buf->buf); pfm_for_each_event_attr(j, info) { pfm_event_attr_info_t ainfo; @@ -224,8 +223,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", ainfo.desc, /*long_desc=*/NULL, - /*encoding_desc=*/buf->buf, - /*metric_name=*/NULL, /*metric_expr=*/NULL); + /*encoding_desc=*/buf->buf); } } } diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c index f51ccaac60ee..38628805a952 100644 --- a/tools/perf/util/pmu-hybrid.c +++ b/tools/perf/util/pmu-hybrid.c @@ -20,32 +20,15 @@ LIST_HEAD(perf_pmu__hybrid_pmus); bool perf_pmu__hybrid_mounted(const char *name) { - char path[PATH_MAX]; - const char *sysfs; - FILE *file; - int n, cpu; + int cpu; + char pmu_name[PATH_MAX]; + struct perf_pmu pmu = {.name = pmu_name}; if (strncmp(name, "cpu_", 4)) return false; - sysfs = sysfs__mountpoint(); - if (!sysfs) - return false; - - snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name); - if (!file_available(path)) - return false; - - file = fopen(path, "r"); - if (!file) - return false; - - n = fscanf(file, "%u", &cpu); - fclose(file); - if (n <= 0) - return false; - - return true; + strlcpy(pmu_name, name, sizeof(pmu_name)); + return perf_pmu__scan_file(&pmu, "cpus", "%u", &cpu) > 0; } struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2bdeb89352e7..c256b29defad 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -19,6 +19,7 @@ #include <regex.h> #include <perf/cpumap.h> #include <fnmatch.h> +#include <math.h> #include "debug.h" #include "evsel.h" #include "pmu.h" @@ -107,14 +108,10 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) static int pmu_format(const char *name, struct list_head *format) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "format")) return -1; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (!file_available(path)) return 0; @@ -283,10 +280,6 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old, perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, &newalias->long_desc); perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); - perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr, - &newalias->metric_expr); - perf_pmu_assign_str(old->name, "metric_name", &old->metric_name, - &newalias->metric_name); perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); old->scale = newalias->scale; old->per_pkg = newalias->per_pkg; @@ -302,8 +295,6 @@ void perf_pmu_free_alias(struct perf_pmu_alias *newalias) zfree(&newalias->long_desc); zfree(&newalias->topic); zfree(&newalias->str); - zfree(&newalias->metric_expr); - zfree(&newalias->metric_name); zfree(&newalias->pmu_name); parse_events_terms__purge(&newalias->terms); free(newalias); @@ -340,16 +331,13 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, int num; char newval[256]; char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL, - *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL, - *pmu_name = NULL; + *deprecated = NULL, *pmu_name = NULL; if (pe) { long_desc = (char *)pe->long_desc; topic = (char *)pe->topic; unit = (char *)pe->unit; perpkg = (char *)pe->perpkg; - metric_expr = (char *)pe->metric_expr; - metric_name = (char *)pe->metric_name; deprecated = (char *)pe->deprecated; pmu_name = (char *)pe->pmu; } @@ -404,8 +392,6 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } - alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; - alias->metric_name = metric_name ? strdup(metric_name): NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -513,14 +499,10 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) static int pmu_aliases(const char *name, struct list_head *head) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "events")) return -1; - snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/events", sysfs, name); - if (!file_available(path)) return 0; @@ -554,52 +536,16 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, return 0; } -/* - * Reading/parsing the default pmu type value, which should be - * located at: - * /sys/bus/event_source/devices/<dev>/type as sysfs attribute. - */ -static int pmu_type(const char *name, __u32 *type) -{ - char path[PATH_MAX]; - FILE *file; - int ret = 0; - const char *sysfs = sysfs__mountpoint(); - - if (!sysfs) - return -1; - - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - - if (access(path, R_OK) < 0) - return -1; - - file = fopen(path, "r"); - if (!file) - return -EINVAL; - - if (1 != fscanf(file, "%u", type)) - ret = -1; - - fclose(file); - return ret; -} - /* Add all pmus in sysfs to pmu list: */ static void pmu_read_sysfs(void) { char path[PATH_MAX]; DIR *dir; struct dirent *dent; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path))) return; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); - dir = opendir(path); if (!dir) return; @@ -614,45 +560,29 @@ static void pmu_read_sysfs(void) closedir(dir); } -static struct perf_cpu_map *__pmu_cpumask(const char *path) -{ - FILE *file; - struct perf_cpu_map *cpus; - - file = fopen(path, "r"); - if (!file) - return NULL; - - cpus = perf_cpu_map__read(file); - fclose(file); - return cpus; -} - /* * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ -#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" -#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" - static struct perf_cpu_map *pmu_cpumask(const char *name) { - char path[PATH_MAX]; struct perf_cpu_map *cpus; - const char *sysfs = sysfs__mountpoint(); const char *templates[] = { - CPUS_TEMPLATE_UNCORE, - CPUS_TEMPLATE_CPU, + "cpumask", + "cpus", NULL }; const char **template; + char pmu_name[PATH_MAX]; + struct perf_pmu pmu = {.name = pmu_name}; + FILE *file; - if (!sysfs) - return NULL; - + strlcpy(pmu_name, name, sizeof(pmu_name)); for (template = templates; *template; template++) { - snprintf(path, PATH_MAX, *template, sysfs, name); - cpus = __pmu_cpumask(path); + file = perf_pmu__open_file(&pmu, *template); + if (!file) + continue; + cpus = perf_cpu_map__read(file); if (cpus) return cpus; } @@ -663,13 +593,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name) static bool pmu_is_uncore(const char *name) { char path[PATH_MAX]; - const char *sysfs; if (perf_pmu__hybrid_mounted(name)) return false; - sysfs = sysfs__mountpoint(); - snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); + perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpumask"); return file_available(path); } @@ -678,9 +606,9 @@ static char *pmu_id(const char *name) char path[PATH_MAX], *str; size_t len; - snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name); + perf_pmu__pathname_scnprintf(path, sizeof(path), name, "identifier"); - if (sysfs__read_str(path, &str, &len) < 0) + if (filename__read_str(path, &str, &len) < 0) return NULL; str[len - 1] = 0; /* remove line feed */ @@ -696,14 +624,9 @@ static char *pmu_id(const char *name) static int is_arm_pmu_core(const char *name) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpus")) return 0; - - /* Look for cpu sysfs (specific to arm) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", - sysfs, name); return file_available(path); } @@ -729,7 +652,12 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu) __weak const struct pmu_events_table *pmu_events_table__find(void) { - return perf_pmu__find_table(NULL); + return perf_pmu__find_events_table(NULL); +} + +__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) +{ + return perf_pmu__find_metrics_table(NULL); } /* @@ -822,9 +750,6 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, struct pmu_add_cpu_aliases_map_data *data = vdata; const char *pname = pe->pmu ? pe->pmu : data->cpu_name; - if (!pe->name) - return 0; - if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name)) goto new_alias; @@ -860,7 +785,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { const struct pmu_events_table *table; - table = perf_pmu__find_table(pmu); + table = perf_pmu__find_events_table(pmu); if (!table) return; @@ -879,12 +804,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; - if (!pe->name) { - if (pe->metric_group || pe->metric_name) - return 0; - return -EINVAL; - } - if (!pe->compat || !pe->pmu) return 0; @@ -931,16 +850,11 @@ pmu_find_alias_name(const char *name __maybe_unused) return NULL; } -static int pmu_max_precise(const char *name) +static int pmu_max_precise(struct perf_pmu *pmu) { - char path[PATH_MAX]; int max_precise = -1; - scnprintf(path, PATH_MAX, - "bus/event_source/devices/%s/caps/max_precise", - name); - - sysfs__read_int(path, &max_precise); + perf_pmu__scan_file(pmu, "caps/max_precise", "%d", &max_precise); return max_precise; } @@ -969,11 +883,8 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) return NULL; /* - * Check the type first to avoid unnecessary work. + * Check the aliases first to avoid unnecessary work. */ - if (pmu_type(name, &type)) - return NULL; - if (pmu_aliases(name, &aliases)) return NULL; @@ -983,9 +894,14 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->cpus = pmu_cpumask(name); pmu->name = strdup(name); + if (!pmu->name) goto err; + /* Read type, and ensure that type value is successfully assigned (return 1) */ + if (perf_pmu__scan_file(pmu, "type", "%u", &type) != 1) + goto err; + alias_name = pmu_find_alias_name(name); if (alias_name) { pmu->alias_name = strdup(alias_name); @@ -997,7 +913,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->max_precise = pmu_max_precise(name); + pmu->max_precise = pmu_max_precise(pmu); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -1469,8 +1385,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; - info->metric_expr = NULL; - info->metric_name = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1486,8 +1400,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; - info->metric_expr = alias->metric_expr; - info->metric_name = alias->metric_name; list_del_init(&term->list); parse_events_term__delete(term); @@ -1703,8 +1615,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) for (j = 0; j < len; j++) { const char *name, *alias = NULL, *scale_unit = NULL, *desc = NULL, *long_desc = NULL, - *encoding_desc = NULL, *topic = NULL, - *metric_name = NULL, *metric_expr = NULL; + *encoding_desc = NULL, *topic = NULL; bool deprecated = false; size_t buf_used; @@ -1742,8 +1653,6 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, "%s/%s/", aliases[j].pmu->name, aliases[j].event->str) + 1; - metric_name = aliases[j].event->metric_name; - metric_expr = aliases[j].event->metric_expr; deprecated = aliases[j].event->deprecated; } print_cb->print_event(print_state, @@ -1756,9 +1665,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) "Kernel PMU event", desc, long_desc, - encoding_desc, - metric_name, - metric_expr); + encoding_desc); } if (printed && pager_in_use()) printf("\n"); @@ -1783,19 +1690,14 @@ bool pmu_have_event(const char *pname, const char *name) return false; } -static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) +FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; - const char *sysfs; - sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name) || + !file_available(path)) return NULL; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - if (!file_available(path)) - return NULL; return fopen(path, "r"); } @@ -1816,6 +1718,16 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, return ret; } +bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name) +{ + char path[PATH_MAX]; + + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name)) + return false; + + return file_available(path); +} + static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) { struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); @@ -1849,7 +1761,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) { struct stat st; char caps_path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); DIR *caps_dir; struct dirent *evt_ent; @@ -1858,12 +1769,9 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) pmu->nr_caps = 0; - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - snprintf(caps_path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); - if (stat(caps_path, &st) < 0) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ @@ -1993,3 +1901,36 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, *ucpus_ptr = unmatched_cpus; return 0; } + +double __weak perf_pmu__cpu_slots_per_cycle(void) +{ + return NAN; +} + +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) +{ + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + return scnprintf(pathname, size, "%s/bus/event_source/devices/", sysfs); +} + +/* + * Fill 'buf' with the path to a file or folder in 'pmu_name' in + * sysfs. For example if pmu_name = "cs_etm" and 'filename' = "format" + * then pathname will be filled with + * "/sys/bus/event_source/devices/cs_etm/format" + * + * Return 0 if the sysfs mountpoint couldn't be found or if no + * characters were written. + */ +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename) +{ + char base_path[PATH_MAX]; + + if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path))) + return 0; + return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 69ca0004f94f..6b770f17eb86 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -7,6 +7,7 @@ #include <linux/perf_event.h> #include <linux/list.h> #include <stdbool.h> +#include <stdio.h> #include "parse-events.h" #include "pmu-events/pmu-events.h" @@ -22,8 +23,6 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 -#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" -#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" #define MAX_PMU_NAME_LEN 128 struct perf_event_attr; @@ -133,8 +132,6 @@ extern struct perf_pmu perf_pmu__fake; struct perf_pmu_info { const char *unit; - const char *metric_expr; - const char *metric_name; double scale; bool per_pkg; bool snapshot; @@ -188,13 +185,6 @@ struct perf_pmu_alias { * default. */ bool deprecated; - /** - * @metric_expr: A metric expression associated with an event. Doing - * this makes little sense due to scale and unit applying to both. - */ - char *metric_expr; - /** @metric_name: A name for the metric. unit applying to both. */ - char *metric_name; /** @pmu_name: The name copied from struct perf_pmu. */ char *pmu_name; }; @@ -231,6 +221,8 @@ bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); +bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name); + int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); @@ -239,6 +231,7 @@ void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu, char *perf_pmu__getcpuid(struct perf_pmu *pmu); const struct pmu_events_table *pmu_events_table__find(void); +const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); @@ -259,4 +252,10 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, char *pmu_find_real_name(const char *name); char *pmu_find_alias_name(const char *name); +double perf_pmu__cpu_slots_per_cycle(void); +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename); +FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name); + #endif /* __PMU_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 2646ae18d9f9..62e9ea7dcf40 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -101,9 +101,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb, void *print "Tracepoint event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } free(dir_path); free(evt_namelist); @@ -195,9 +193,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) "SDT event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); free(evt_name); } @@ -255,9 +251,7 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta event_type_descriptors[PERF_TYPE_HW_CACHE], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } strlist__delete(evt_name_list); return 0; @@ -277,9 +271,7 @@ void print_tool_events(const struct print_callbacks *print_cb, void *print_state "Tool event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } } @@ -331,9 +323,7 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta event_type_descriptors[type], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } strlist__delete(evt_name_list); } @@ -364,9 +354,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_RAW], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_cb->print_event(print_state, /*topic=*/NULL, @@ -378,9 +366,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_RAW], "(see 'man perf-list' on how to encode it)", /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_cb->print_event(print_state, /*topic=*/NULL, @@ -392,9 +378,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_BREAKPOINT], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_tracepoint_events(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index c237e53c4487..716dcf4b4859 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -16,8 +16,7 @@ struct print_callbacks { const char *scale_unit, bool deprecated, const char *event_type_desc, const char *desc, const char *long_desc, - const char *encoding_desc, - const char *metric_name, const char *metric_expr); + const char *encoding_desc); void (*print_metric)(void *print_state, const char *group, const char *name, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0c24bc7afbca..881d94f65a6b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -917,7 +917,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf); if (!dinfo) { if (need_dwarf) - return -ENOENT; + return -ENODATA; pr_debug("Could not open debuginfo. Try to use symbols.\n"); return 0; } @@ -956,7 +956,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, if (ntevs == 0) { /* No error but failed to find probe point. */ pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); - return -ENOENT; + return -ENODEV; } else if (ntevs < 0) { /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 212031b97910..42e8b813d010 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -20,6 +20,7 @@ #include "stat.h" #include "metricgroup.h" #include "util/env.h" +#include "util/pmu.h" #include <internal/lib.h> #include "util.h" @@ -76,14 +77,7 @@ const char *perf_env__arch(struct perf_env *env __maybe_unused) } /* - * Add this one here not to drag util/stat-shadow.c - */ -void perf_stat__collect_metric_expr(struct evlist *evsel_list) -{ -} - -/* - * This one is needed not to drag the PMU bandwagon, jevents generated + * These ones are needed not to drag the PMU bandwagon, jevents generated * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so * far, for the perf python binding known usecases, revisit if this become @@ -94,6 +88,11 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) return NULL; } +int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) +{ + return EOF; +} + /* * Add this one here not to drag util/metricgroup.c */ @@ -442,10 +441,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 60ec79d4eea4..33b08e0ac746 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -92,7 +92,10 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - u16 p_stage_cyc; + union { + u16 p_stage_cyc; + u16 retire_lat; + }; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index c097b7934fd4..83fd2fd0ba16 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -393,10 +393,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index e930f5f1f36d..2c2697c5d025 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -994,10 +994,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7c021c6cedb9..749d5b5c135b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1180,7 +1180,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *e = &entries[i]; if (!callstack) { - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n", + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n", i, e->from, e->to, (unsigned short)e->flags.cycles, e->flags.mispred ? "M" : " ", @@ -1188,7 +1188,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", (unsigned)e->flags.reserved, - get_branch_type(e)); + get_branch_type(e), + e->flags.spec ? branch_spec_desc(e->flags.spec) : ""); } else { if (i == 0) { printf("..... %2"PRIu64": %016" PRIx64 "\n" @@ -1699,8 +1700,13 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_AUXTRACE_INFO: return tool->auxtrace_info(session, event); case PERF_RECORD_AUXTRACE: - /* setup for reading amidst mmap */ - lseek(fd, file_offset + event->header.size, SEEK_SET); + /* + * Setup for reading amidst mmap, but only when we + * are in 'file' mode. The 'pipe' fd is in proper + * place already. + */ + if (!perf_data__is_pipe(session->data)) + lseek(fd, file_offset + event->header.size, SEEK_SET); return tool->auxtrace(session, event); case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 37662cdec5ee..093a0c8b2e3d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -28,6 +28,7 @@ #include "time-utils.h" #include "cgroup.h" #include "machine.h" +#include "trace-event.h" #include <linux/kernel.h> #include <linux/string.h> @@ -53,6 +54,13 @@ static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; /* + * Some architectures have Adjacent Cacheline Prefetch feature, which + * behaves like the cacheline size is doubled. Enable this flag to + * check things in double cacheline granularity. + */ +bool chk_double_cl; + +/* * Replaces all occurrences of a char used with the: * * -t, --field-separator @@ -1499,8 +1507,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) addr: /* al_addr does all the right addr - start + offset calculations */ - l = cl_address(left->mem_info->daddr.al_addr); - r = cl_address(right->mem_info->daddr.al_addr); + l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl); + r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl); if (l > r) return -1; if (l < r) return 1; @@ -1519,7 +1527,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if (he->mem_info) { struct map *map = he->mem_info->daddr.ms.map; - addr = cl_address(he->mem_info->daddr.al_addr); + addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl); ms = &he->mem_info->daddr.ms; /* print [s] for shared data mmaps */ @@ -2132,6 +2140,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), DIM(SORT_ADDR, "addr", sort_addr), + DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), + DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), }; #undef DIM @@ -2667,10 +2677,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif /* record max width for output */ if (size > hde->dynamic_len) hde->dynamic_len = size; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 921715e6aec4..d79a100e5999 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -35,6 +35,7 @@ extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; extern const char default_mem_sort_order[]; +extern bool chk_double_cl; struct res_sample { u64 time; @@ -237,6 +238,8 @@ enum sort_type { SORT_LOCAL_PIPELINE_STAGE_CYC, SORT_GLOBAL_PIPELINE_STAGE_CYC, SORT_ADDR, + SORT_LOCAL_RETIRE_LAT, + SORT_GLOBAL_RETIRE_LAT, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 8bd8b0142630..1b5cb20efd23 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -787,6 +787,51 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun uniquify_event_name(counter); } +/** + * should_skip_zero_count() - Check if the event should print 0 values. + * @config: The perf stat configuration (including aggregation mode). + * @counter: The evsel with its associated cpumap. + * @id: The aggregation id that is being queried. + * + * Due to mismatch between the event cpumap or thread-map and the + * aggregation mode, sometimes it'd iterate the counter with the map + * which does not contain any values. + * + * For example, uncore events have dedicated CPUs to manage them, + * result for other CPUs should be zero and skipped. + * + * Return: %true if the value should NOT be printed, %false if the value + * needs to be printed like "<not counted>" or "<not supported>". + */ +static bool should_skip_zero_counter(struct perf_stat_config *config, + struct evsel *counter, + const struct aggr_cpu_id *id) +{ + struct perf_cpu cpu; + int idx; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise it will have too many 0 output. + */ + if (config->aggr_mode == AGGR_THREAD && config->system_wide) + return true; + /* + * Skip value 0 when it's an uncore event and the given aggr id + * does not belong to the PMU cpumask. + */ + if (!counter->pmu || !counter->pmu->is_uncore) + return false; + + perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); + + if (aggr_cpu_id__equal(id, &own_id)) + return false; + } + return true; +} + static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, struct outstate *os) @@ -814,11 +859,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - /* - * Skip value 0 when enabling --per-thread globally, otherwise it will - * have too many 0 output. - */ - if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide) + if (val == 0 && should_skip_zero_counter(config, counter, &id)) return; if (!metric_only) { diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index cadb2df23c87..806b32156459 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -311,7 +311,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; - } else if (counter->metric_leader) { + } else if (counter->metric_leader && !counter->merged_stat) { v = saved_value_lookup(counter->metric_leader, map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; @@ -346,114 +346,6 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static struct evsel *perf_stat__find_event(struct evlist *evsel_list, - const char *name) -{ - struct evsel *c2; - - evlist__for_each_entry (evsel_list, c2) { - if (!strcasecmp(c2->name, name) && !c2->collect_stat) - return c2; - } - return NULL; -} - -/* Mark MetricExpr target events and link events using them to them. */ -void perf_stat__collect_metric_expr(struct evlist *evsel_list) -{ - struct evsel *counter, *leader, **metric_events, *oc; - bool found; - struct expr_parse_ctx *ctx; - struct hashmap_entry *cur; - size_t bkt; - int i; - - ctx = expr__ctx_new(); - if (!ctx) { - pr_debug("expr__ctx_new failed"); - return; - } - evlist__for_each_entry(evsel_list, counter) { - bool invalid = false; - - leader = evsel__leader(counter); - if (!counter->metric_expr) - continue; - - expr__ctx_clear(ctx); - metric_events = counter->metric_events; - if (!metric_events) { - if (expr__find_ids(counter->metric_expr, - counter->name, - ctx) < 0) - continue; - - metric_events = calloc(sizeof(struct evsel *), - hashmap__size(ctx->ids) + 1); - if (!metric_events) { - expr__ctx_free(ctx); - return; - } - counter->metric_events = metric_events; - } - - i = 0; - hashmap__for_each_entry(ctx->ids, cur, bkt) { - const char *metric_name = cur->pkey; - - found = false; - if (leader) { - /* Search in group */ - for_each_group_member (oc, leader) { - if (!strcasecmp(oc->name, - metric_name) && - !oc->collect_stat) { - found = true; - break; - } - } - } - if (!found) { - /* Search ignoring groups */ - oc = perf_stat__find_event(evsel_list, - metric_name); - } - if (!oc) { - /* Deduping one is good enough to handle duplicated PMUs. */ - static char *printed; - - /* - * Adding events automatically would be difficult, because - * it would risk creating groups that are not schedulable. - * perf stat doesn't understand all the scheduling constraints - * of events. So we ask the user instead to add the missing - * events. - */ - if (!printed || - strcasecmp(printed, metric_name)) { - fprintf(stderr, - "Add %s event to groups to get metric expression for %s\n", - metric_name, - counter->name); - free(printed); - printed = strdup(metric_name); - } - invalid = true; - continue; - } - metric_events[i++] = oc; - oc->collect_stat = true; - } - metric_events[i] = NULL; - if (invalid) { - free(metric_events); - counter->metric_events = NULL; - counter->metric_expr = NULL; - } - } - expr__ctx_free(ctx); -} - static double runtime_stat_avg(struct runtime_stat *st, enum stat_type type, int map_idx, struct runtime_stat_data *rsd) @@ -1299,10 +1191,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, color = NULL; print_metric(config, ctxp, color, "%8.1f%%", "Core Bound", core_bound * 100.); - } else if (evsel->metric_expr) { - generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, - map_idx, out, st); } else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 499c3bf81333..b1c29156c560 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -257,7 +257,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st); -void perf_stat__collect_metric_expr(struct evlist *); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 96767d1b3f1c..41882ae8452e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -323,15 +323,325 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) return demangled; } -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) +struct rel_info { + u32 nr_entries; + u32 *sorted; + bool is_rela; + Elf_Data *reldata; + GElf_Rela rela; + GElf_Rel rel; +}; + +static u32 get_rel_symidx(struct rel_info *ri, u32 idx) +{ + idx = ri->sorted ? ri->sorted[idx] : idx; + if (ri->is_rela) { + gelf_getrela(ri->reldata, idx, &ri->rela); + return GELF_R_SYM(ri->rela.r_info); + } + gelf_getrel(ri->reldata, idx, &ri->rel); + return GELF_R_SYM(ri->rel.r_info); +} -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) +static u64 get_rel_offset(struct rel_info *ri, u32 x) +{ + if (ri->is_rela) { + GElf_Rela rela; + + gelf_getrela(ri->reldata, x, &rela); + return rela.r_offset; + } else { + GElf_Rel rel; + + gelf_getrel(ri->reldata, x, &rel); + return rel.r_offset; + } +} + +static int rel_cmp(const void *a, const void *b, void *r) +{ + struct rel_info *ri = r; + u64 a_offset = get_rel_offset(ri, *(const u32 *)a); + u64 b_offset = get_rel_offset(ri, *(const u32 *)b); + + return a_offset < b_offset ? -1 : (a_offset > b_offset ? 1 : 0); +} + +static int sort_rel(struct rel_info *ri) +{ + size_t sz = sizeof(ri->sorted[0]); + u32 i; + + ri->sorted = calloc(ri->nr_entries, sz); + if (!ri->sorted) + return -1; + for (i = 0; i < ri->nr_entries; i++) + ri->sorted[i] = i; + qsort_r(ri->sorted, ri->nr_entries, sz, rel_cmp, ri); + return 0; +} + +/* + * For x86_64, the GNU linker is putting IFUNC information in the relocation + * addend. + */ +static bool addend_may_be_ifunc(GElf_Ehdr *ehdr, struct rel_info *ri) +{ + return ehdr->e_machine == EM_X86_64 && ri->is_rela && + GELF_R_TYPE(ri->rela.r_info) == R_X86_64_IRELATIVE; +} + +static bool get_ifunc_name(Elf *elf, struct dso *dso, GElf_Ehdr *ehdr, + struct rel_info *ri, char *buf, size_t buf_sz) +{ + u64 addr = ri->rela.r_addend; + struct symbol *sym; + GElf_Phdr phdr; + + if (!addend_may_be_ifunc(ehdr, ri)) + return false; + + if (elf_read_program_header(elf, addr, &phdr)) + return false; + + addr -= phdr.p_vaddr - phdr.p_offset; + + sym = dso__find_symbol_nocache(dso, addr); + + /* Expecting the address to be an IFUNC or IFUNC alias */ + if (!sym || sym->start != addr || (sym->type != STT_GNU_IFUNC && !sym->ifunc_alias)) + return false; + + snprintf(buf, buf_sz, "%s@plt", sym->name); + + return true; +} + +static void exit_rel(struct rel_info *ri) +{ + free(ri->sorted); +} + +static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, + u64 *plt_header_size, u64 *plt_entry_size) +{ + switch (ehdr->e_machine) { + case EM_ARM: + *plt_header_size = 20; + *plt_entry_size = 12; + return true; + case EM_AARCH64: + *plt_header_size = 32; + *plt_entry_size = 16; + return true; + case EM_SPARC: + *plt_header_size = 48; + *plt_entry_size = 12; + return true; + case EM_SPARCV9: + *plt_header_size = 128; + *plt_entry_size = 32; + return true; + case EM_386: + case EM_X86_64: + *plt_entry_size = shdr_plt->sh_entsize; + /* Size is 8 or 16, if not, assume alignment indicates size */ + if (*plt_entry_size != 8 && *plt_entry_size != 16) + *plt_entry_size = shdr_plt->sh_addralign == 8 ? 8 : 16; + *plt_header_size = *plt_entry_size; + break; + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ + *plt_header_size = shdr_plt->sh_entsize; + *plt_entry_size = shdr_plt->sh_entsize; + break; + } + if (*plt_entry_size) + return true; + pr_debug("Missing PLT entry size for %s\n", dso->long_name); + return false; +} + +static bool machine_is_x86(GElf_Half e_machine) +{ + return e_machine == EM_386 || e_machine == EM_X86_64; +} + +struct rela_dyn { + GElf_Addr offset; + u32 sym_idx; +}; + +struct rela_dyn_info { + struct dso *dso; + Elf_Data *plt_got_data; + u32 nr_entries; + struct rela_dyn *sorted; + Elf_Data *dynsym_data; + Elf_Data *dynstr_data; + Elf_Data *rela_dyn_data; +}; + +static void exit_rela_dyn(struct rela_dyn_info *di) +{ + free(di->sorted); +} + +static int cmp_offset(const void *a, const void *b) +{ + const struct rela_dyn *va = a; + const struct rela_dyn *vb = b; + + return va->offset < vb->offset ? -1 : (va->offset > vb->offset ? 1 : 0); +} + +static int sort_rela_dyn(struct rela_dyn_info *di) +{ + u32 i, n; + + di->sorted = calloc(di->nr_entries, sizeof(di->sorted[0])); + if (!di->sorted) + return -1; + + /* Get data for sorting: the offset and symbol index */ + for (i = 0, n = 0; i < di->nr_entries; i++) { + GElf_Rela rela; + u32 sym_idx; + + gelf_getrela(di->rela_dyn_data, i, &rela); + sym_idx = GELF_R_SYM(rela.r_info); + if (sym_idx) { + di->sorted[n].sym_idx = sym_idx; + di->sorted[n].offset = rela.r_offset; + n += 1; + } + } + + /* Sort by offset */ + di->nr_entries = n; + qsort(di->sorted, n, sizeof(di->sorted[0]), cmp_offset); + + return 0; +} + +static void get_rela_dyn_info(Elf *elf, GElf_Ehdr *ehdr, struct rela_dyn_info *di, Elf_Scn *scn) +{ + GElf_Shdr rela_dyn_shdr; + GElf_Shdr shdr; + + di->plt_got_data = elf_getdata(scn, NULL); + + scn = elf_section_by_name(elf, ehdr, &rela_dyn_shdr, ".rela.dyn", NULL); + if (!scn || !rela_dyn_shdr.sh_link || !rela_dyn_shdr.sh_entsize) + return; + + di->nr_entries = rela_dyn_shdr.sh_size / rela_dyn_shdr.sh_entsize; + di->rela_dyn_data = elf_getdata(scn, NULL); + + scn = elf_getscn(elf, rela_dyn_shdr.sh_link); + if (!scn || !gelf_getshdr(scn, &shdr) || !shdr.sh_link) + return; + + di->dynsym_data = elf_getdata(scn, NULL); + di->dynstr_data = elf_getdata(elf_getscn(elf, shdr.sh_link), NULL); + + if (!di->plt_got_data || !di->dynstr_data || !di->dynsym_data || !di->rela_dyn_data) + return; + + /* Sort into offset order */ + sort_rela_dyn(di); +} + +/* Get instruction displacement from a plt entry for x86_64 */ +static u32 get_x86_64_plt_disp(const u8 *p) +{ + u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa}; + int n = 0; + + /* Skip endbr64 */ + if (!memcmp(p, endbr64, sizeof(endbr64))) + n += sizeof(endbr64); + /* Skip bnd prefix */ + if (p[n] == 0xf2) + n += 1; + /* jmp with 4-byte displacement */ + if (p[n] == 0xff && p[n + 1] == 0x25) { + n += 2; + /* Also add offset from start of entry to end of instruction */ + return n + 4 + le32toh(*(const u32 *)(p + n)); + } + return 0; +} + +static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, + struct rela_dyn_info *di, + char *buf, size_t buf_sz) +{ + struct rela_dyn vi, *vr; + const char *sym_name; + char *demangled; + GElf_Sym sym; + u32 disp; + + if (!di->sorted) + return false; + + disp = get_x86_64_plt_disp(di->plt_got_data->d_buf + i); + if (!disp) + return false; + + /* Compute target offset of the .plt.got entry */ + vi.offset = shdr->sh_offset + di->plt_got_data->d_off + i + disp; + + /* Find that offset in .rela.dyn (sorted by offset) */ + vr = bsearch(&vi, di->sorted, di->nr_entries, sizeof(di->sorted[0]), cmp_offset); + if (!vr) + return false; + + /* Get the associated symbol */ + gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); + sym_name = elf_sym__name(&sym, di->dynstr_data); + demangled = demangle_sym(di->dso, 0, sym_name); + if (demangled != NULL) + sym_name = demangled; + + snprintf(buf, buf_sz, "%s@plt", sym_name); + + free(demangled); + + return *sym_name; +} + +static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf, + GElf_Ehdr *ehdr, + char *buf, size_t buf_sz) +{ + struct rela_dyn_info di = { .dso = dso }; + struct symbol *sym; + GElf_Shdr shdr; + Elf_Scn *scn; + int err = -1; + size_t i; + + scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL); + if (!scn || !shdr.sh_entsize) + return 0; + + if (ehdr->e_machine == EM_X86_64) + get_rela_dyn_info(elf, ehdr, &di, scn); + + for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) { + if (!get_plt_got_name(&shdr, i, &di, buf, buf_sz)) + snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i); + sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf); + if (!sym) + goto out; + symbols__insert(&dso->symbols, sym); + } + err = 0; +out: + exit_rela_dyn(&di); + return err; +} /* * We need to check if we have a .dynsym, so that we can handle the @@ -342,56 +652,104 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) */ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) { - uint32_t nr_rel_entries, idx; + uint32_t idx; GElf_Sym sym; u64 plt_offset, plt_header_size, plt_entry_size; - GElf_Shdr shdr_plt; - struct symbol *f; + GElf_Shdr shdr_plt, plt_sec_shdr; + struct symbol *f, *plt_sym; GElf_Shdr shdr_rel_plt, shdr_dynsym; - Elf_Data *reldata, *syms, *symstrs; + Elf_Data *syms, *symstrs; Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; - size_t dynsym_idx; GElf_Ehdr ehdr; char sympltname[1024]; Elf *elf; - int nr = 0, symidx, err = 0; - - if (!ss->dynsym) - return 0; + int nr = 0, err = -1; + struct rel_info ri = { .is_rela = false }; + bool lazy_plt; elf = ss->elf; ehdr = ss->ehdr; - scn_dynsym = ss->dynsym; - shdr_dynsym = ss->dynshdr; - dynsym_idx = ss->dynsym_idx; + if (!elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL)) + return 0; + + /* + * A symbol from a previous section (e.g. .init) can have been expanded + * by symbols__fixup_end() to overlap .plt. Truncate it before adding + * a symbol for .plt header. + */ + f = dso__find_symbol_nocache(dso, shdr_plt.sh_offset); + if (f && f->start < shdr_plt.sh_offset && f->end > shdr_plt.sh_offset) + f->end = shdr_plt.sh_offset; + + if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size)) + return 0; + + /* Add a symbol for .plt header */ + plt_sym = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt"); + if (!plt_sym) + goto out_elf_end; + symbols__insert(&dso->symbols, plt_sym); - if (scn_dynsym == NULL) + /* Only x86 has .plt.got */ + if (machine_is_x86(ehdr.e_machine) && + dso__synthesize_plt_got_symbols(dso, elf, &ehdr, sympltname, sizeof(sympltname))) goto out_elf_end; + /* Only x86 has .plt.sec */ + if (machine_is_x86(ehdr.e_machine) && + elf_section_by_name(elf, &ehdr, &plt_sec_shdr, ".plt.sec", NULL)) { + if (!get_plt_sizes(dso, &ehdr, &plt_sec_shdr, &plt_header_size, &plt_entry_size)) + return 0; + /* Extend .plt symbol to entire .plt */ + plt_sym->end = plt_sym->start + shdr_plt.sh_size; + /* Use .plt.sec offset */ + plt_offset = plt_sec_shdr.sh_offset; + lazy_plt = false; + } else { + plt_offset = shdr_plt.sh_offset; + lazy_plt = true; + } + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rela.plt", NULL); if (scn_plt_rel == NULL) { scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rel.plt", NULL); if (scn_plt_rel == NULL) - goto out_elf_end; + return 0; } - err = -1; + if (shdr_rel_plt.sh_type != SHT_RELA && + shdr_rel_plt.sh_type != SHT_REL) + return 0; - if (shdr_rel_plt.sh_link != dynsym_idx) - goto out_elf_end; + if (!shdr_rel_plt.sh_link) + return 0; - if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) + if (shdr_rel_plt.sh_link == ss->dynsym_idx) { + scn_dynsym = ss->dynsym; + shdr_dynsym = ss->dynshdr; + } else if (shdr_rel_plt.sh_link == ss->symtab_idx) { + /* + * A static executable can have a .plt due to IFUNCs, in which + * case .symtab is used not .dynsym. + */ + scn_dynsym = ss->symtab; + shdr_dynsym = ss->symshdr; + } else { goto out_elf_end; + } + + if (!scn_dynsym) + return 0; /* * Fetch the relocation section to find the idxes to the GOT * and the symbols in the .dynsym they refer to. */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) + ri.reldata = elf_getdata(scn_plt_rel, NULL); + if (!ri.reldata) goto out_elf_end; syms = elf_getdata(scn_dynsym, NULL); @@ -409,93 +767,57 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) if (symstrs->d_size == 0) goto out_elf_end; - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - switch (ehdr.e_machine) { - case EM_ARM: - plt_header_size = 20; - plt_entry_size = 12; - break; + ri.nr_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - case EM_AARCH64: - plt_header_size = 32; - plt_entry_size = 16; - break; + ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; - case EM_SPARC: - plt_header_size = 48; - plt_entry_size = 12; - break; - - case EM_SPARCV9: - plt_header_size = 128; - plt_entry_size = 32; - break; - - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ - plt_header_size = shdr_plt.sh_entsize; - plt_entry_size = shdr_plt.sh_entsize; - break; + if (lazy_plt) { + /* + * Assume a .plt with the same number of entries as the number + * of relocation entries is not lazy and does not have a header. + */ + if (ri.nr_entries * plt_entry_size == shdr_plt.sh_size) + dso__delete_symbol(dso, plt_sym); + else + plt_offset += plt_header_size; } - plt_offset += plt_header_size; - - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { - const char *elf_name = NULL; - char *demangled = NULL; - symidx = GELF_R_SYM(pos->r_info); - gelf_getsym(syms, symidx, &sym); + /* + * x86 doesn't insert IFUNC relocations in .plt order, so sort to get + * back in order. + */ + if (machine_is_x86(ehdr.e_machine) && sort_rel(&ri)) + goto out_elf_end; - elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); - if (demangled != NULL) - elf_name = demangled; - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_name); - free(demangled); + for (idx = 0; idx < ri.nr_entries; idx++) { + const char *elf_name = NULL; + char *demangled = NULL; - f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, STT_FUNC, sympltname); - if (!f) - goto out_elf_end; + gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); - plt_offset += plt_entry_size; - symbols__insert(&dso->symbols, f); - ++nr; - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - const char *elf_name = NULL; - char *demangled = NULL; - symidx = GELF_R_SYM(pos->r_info); - gelf_getsym(syms, symidx, &sym); - - elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); - if (demangled != NULL) - elf_name = demangled; + elf_name = elf_sym__name(&sym, symstrs); + demangled = demangle_sym(dso, 0, elf_name); + if (demangled) + elf_name = demangled; + if (*elf_name) + snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); + else if (!get_ifunc_name(elf, dso, &ehdr, &ri, sympltname, sizeof(sympltname))) snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_name); - free(demangled); + "offset_%#" PRIx64 "@plt", plt_offset); + free(demangled); - f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, STT_FUNC, sympltname); - if (!f) - goto out_elf_end; + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, STT_FUNC, sympltname); + if (!f) + goto out_elf_end; - plt_offset += plt_entry_size; - symbols__insert(&dso->symbols, f); - ++nr; - } + plt_offset += plt_entry_size; + symbols__insert(&dso->symbols, f); + ++nr; } err = 0; out_elf_end: + exit_rel(&ri); if (err == 0) return nr; pr_debug("%s: problems reading %s PLT info.\n", @@ -946,8 +1268,9 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64); + ss->symtab_idx = 0; ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab", - NULL); + &ss->symtab_idx); if (ss->symshdr.sh_type != SHT_SYMTAB) ss->symtab = NULL; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a3a165ae933a..317c0706852f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -201,10 +201,14 @@ again: continue; if (choose_best_symbol(curr, next) == SYMBOL_A) { + if (next->type == STT_GNU_IFUNC) + curr->ifunc_alias = true; rb_erase_cached(&next->rb_node, symbols); symbol__delete(next); goto again; } else { + if (curr->type == STT_GNU_IFUNC) + next->ifunc_alias = true; nd = rb_next(&curr->rb_node); rb_erase_cached(&curr->rb_node, symbols); symbol__delete(curr); @@ -556,6 +560,11 @@ struct symbol *dso__find_symbol(struct dso *dso, u64 addr) return dso->last_find_result.symbol; } +struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr) +{ + return symbols__find(&dso->symbols, addr); +} + struct symbol *dso__first_symbol(struct dso *dso) { return symbols__first(&dso->symbols); @@ -1368,10 +1377,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map, /* Find the kernel map using the '_stext' symbol */ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) { + u64 replacement_size = 0; + list_for_each_entry(new_map, &md.maps, node) { - if (stext >= new_map->start && stext < new_map->end) { + u64 new_size = new_map->end - new_map->start; + + if (!(stext >= new_map->start && stext < new_map->end)) + continue; + + /* + * On some architectures, ARM64 for example, the kernel + * text can get allocated inside of the vmalloc segment. + * Select the smallest matching segment, in case stext + * falls within more than one in the list. + */ + if (!replacement_map || new_size < replacement_size) { replacement_map = new_map; - break; + replacement_size = new_size; } } } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f735108c4d4e..7558735543c2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -64,6 +64,8 @@ struct symbol { u8 inlined:1; /** Has symbol__annotate2 been performed. */ u8 annotate2:1; + /** Symbol is an alias of an STT_GNU_IFUNC */ + u8 ifunc_alias:1; /** Architecture specific. Unused except on PPC where it holds st_other. */ u8 arch_sym; /** The name of length namelen associated with the symbol. */ @@ -148,6 +150,7 @@ void dso__delete_symbol(struct dso *dso, struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); +struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); struct symbol *symbol__next_by_name(struct symbol *sym); diff --git a/tools/perf/util/symsrc.h b/tools/perf/util/symsrc.h index 2665b4bde751..edf82028c9e6 100644 --- a/tools/perf/util/symsrc.h +++ b/tools/perf/util/symsrc.h @@ -26,6 +26,7 @@ struct symsrc { GElf_Shdr opdshdr; Elf_Scn *symtab; + size_t symtab_idx; GElf_Shdr symshdr; Elf_Scn *dynsym; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3ab6a92b1a6d..9ab9308ee80c 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2219,8 +2219,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); - ev.build_id.size = pos->bid.size; + ev.build_id.size = min(pos->bid.size, sizeof(pos->bid.data)); + memcpy(&ev.build_id.build_id, pos->bid.data, ev.build_id.size); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 9b3cd79cca12..a0cff184b1cd 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -21,6 +21,9 @@ struct trace_event { struct tep_plugin_list *plugin_list; }; +/* Computes a version number comparable with LIBTRACEEVENT_VERSION from Makefile.config. */ +#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) + typedef char *(tep_func_resolver_t)(void *priv, unsigned long long *addrp, char **modp); @@ -137,4 +140,20 @@ int common_lock_depth(struct scripting_context *context); #define SAMPLE_FLAGS_BUF_SIZE 64 int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); +#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) +#include <traceevent/event-parse.h> + +static inline bool tep_field_is_relative(unsigned long flags) +{ + return (flags & TEP_FIELD_IS_RELATIVE) != 0; +} +#else +#include <linux/compiler.h> + +static inline bool tep_field_is_relative(unsigned long flags __maybe_unused) +{ + return false; +} +#endif + #endif /* _PERF_UTIL_TRACE_EVENT_H */ |