summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build5
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c6
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c4
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h1
-rw-r--r--tools/perf/util/auxtrace.c12
-rw-r--r--tools/perf/util/auxtrace.h7
-rw-r--r--tools/perf/util/bpf_lock_contention.c183
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c104
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h4
-rw-r--r--tools/perf/util/branch.c15
-rw-r--r--tools/perf/util/branch.h2
-rw-r--r--tools/perf/util/cacheline.h25
-rw-r--r--tools/perf/util/cgroup.c1
-rw-r--r--tools/perf/util/cputopo.c9
-rw-r--r--tools/perf/util/cs-etm-base.c34
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c71
-rw-r--r--tools/perf/util/cs-etm.c95
-rw-r--r--tools/perf/util/cs-etm.h16
-rw-r--r--tools/perf/util/data-convert-bt.c4
-rw-r--r--tools/perf/util/debug.c15
-rw-r--r--tools/perf/util/evsel.c21
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/evswitch.h4
-rw-r--r--tools/perf/util/expr.c5
-rw-r--r--tools/perf/util/expr.h1
-rw-r--r--tools/perf/util/expr.l8
-rw-r--r--tools/perf/util/intel-pt.c73
-rw-r--r--tools/perf/util/llvm-utils.c25
-rw-r--r--tools/perf/util/lock-contention.h7
-rw-r--r--tools/perf/util/metricgroup.c207
-rw-r--r--tools/perf/util/metricgroup.h4
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/pfm.c6
-rw-r--r--tools/perf/util/pmu-hybrid.c27
-rw-r--r--tools/perf/util/pmu.c227
-rw-r--r--tools/perf/util/pmu.h21
-rw-r--r--tools/perf/util/print-events.c32
-rw-r--r--tools/perf/util/print-events.h3
-rw-r--r--tools/perf/util/probe-event.c4
-rw-r--r--tools/perf/util/python.c19
-rw-r--r--tools/perf/util/sample.h5
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c14
-rw-r--r--tools/perf/util/sort.c20
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c51
-rw-r--r--tools/perf/util/stat-shadow.c114
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/symbol-elf.c527
-rw-r--r--tools/perf/util/symbol.c26
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/symsrc.h1
-rw-r--r--tools/perf/util/synthetic-events.c4
-rw-r--r--tools/perf/util/trace-event.h19
55 files changed, 1378 insertions, 729 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 79b9498886a2..918b501f9bd8 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -154,7 +154,10 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
-perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
+endif
ifeq ($(CONFIG_LIBTRACEEVENT),y)
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 091987dd3966..40dcedfd75cd 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -68,7 +68,11 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
/* Clean highest byte */
payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
} else {
- pr_err("unsupported address packet index: 0x%x\n", index);
+ static u32 seen_idx = 0;
+ if (!(seen_idx & BIT(index))) {
+ seen_idx |= BIT(index);
+ pr_warning("ignoring unsupported address packet index: 0x%x\n", index);
+ }
}
return payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 2f311189c6e8..fed4741f372e 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -422,16 +422,18 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
int ch, pat;
u64 payload = packet->payload;
int err = 0;
+ static const char *idx_name[] = {"PC", "TGT", "VA", "PA", "PBT"};
switch (idx) {
case SPE_ADDR_PKT_HDR_INDEX_INS:
case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
+ case SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH:
ns = !!SPE_ADDR_PKT_GET_NS(payload);
el = SPE_ADDR_PKT_GET_EL(payload);
payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
arm_spe_pkt_out_string(&err, &buf, &buf_len,
"%s 0x%llx el%d ns=%d",
- (idx == 1) ? "TGT" : "PC", payload, el, ns);
+ idx_name[idx], payload, el, ns);
break;
case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
arm_spe_pkt_out_string(&err, &buf, &buf_len,
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 9b970e7bf1e2..f75ed3a8a050 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -65,6 +65,7 @@ struct arm_spe_pkt {
#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1
#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2
#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3
+#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4
/* Address packet payload */
#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c2e323cd7d49..498ff7f24463 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1133,6 +1133,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
if (auxtrace__dont_decode(session))
return 0;
+ if (perf_data__is_pipe(session->data))
+ return 0;
+
if (!session->auxtrace || !session->auxtrace->queue_data)
return -EINVAL;
@@ -1391,6 +1394,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->calls = true;
} else {
synth_opts->instructions = true;
+ synth_opts->cycles = true;
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
}
@@ -1479,7 +1483,11 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
for (p = str; *p;) {
switch (*p++) {
case 'i':
- synth_opts->instructions = true;
+ case 'y':
+ if (p[-1] == 'y')
+ synth_opts->cycles = true;
+ else
+ synth_opts->instructions = true;
while (*p == ' ' || *p == ',')
p += 1;
if (isdigit(*p)) {
@@ -1638,7 +1646,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
}
}
out:
- if (synth_opts->instructions) {
+ if (synth_opts->instructions || synth_opts->cycles) {
if (!period_type_set)
synth_opts->period_type =
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 2cf63d377831..29eb82dff574 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -71,6 +71,9 @@ enum itrace_period_type {
* @inject: indicates the event (not just the sample) must be fully synthesized
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
+ * @cycles: whether to synthesize 'cycles' events
+ * (not fully accurate, since CYC packets are only emitted
+ * together with other events, such as branches)
* @branches: whether to synthesize 'branches' events
* (branch misses only for Arm SPE)
* @transactions: whether to synthesize events for transactions
@@ -119,6 +122,7 @@ struct itrace_synth_opts {
bool default_no_sample;
bool inject;
bool instructions;
+ bool cycles;
bool branches;
bool transactions;
bool ptwrites;
@@ -643,6 +647,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
#define ITRACE_HELP \
" i[period]: synthesize instructions events\n" \
+" y[period]: synthesize cycles events (same period as i)\n" \
" b: synthesize branches events (branch misses for Arm SPE)\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
@@ -674,7 +679,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
" A: approximate IPC\n" \
" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
-" concatenate multiple options. Default is ibxwpe or cewp\n"
+" concatenate multiple options. Default is iybxwpe or cewp\n"
static inline
void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 0236334fd69b..fadcacb9d501 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -34,13 +34,15 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
- if (con->aggr_mode == LOCK_AGGR_TASK) {
+ if (con->aggr_mode == LOCK_AGGR_TASK)
bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
- bpf_map__set_max_entries(skel->maps.stacks, 1);
- } else {
+ else
bpf_map__set_max_entries(skel->maps.task_data, 1);
+
+ if (con->save_callstack)
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
- }
+ else
+ bpf_map__set_max_entries(skel->maps.stacks, 1);
if (target__has_cpu(target))
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
@@ -146,6 +148,8 @@ int lock_contention_prepare(struct lock_contention *con)
/* these don't work well if in the rodata section */
skel->bss->stack_skip = con->stack_skip;
skel->bss->aggr_mode = con->aggr_mode;
+ skel->bss->needs_callstack = con->save_callstack;
+ skel->bss->lock_owner = con->owner;
lock_contention_bpf__attach(skel);
return 0;
@@ -163,9 +167,70 @@ int lock_contention_stop(void)
return 0;
}
+static const char *lock_contention_get_name(struct lock_contention *con,
+ struct contention_key *key,
+ u64 *stack_trace)
+{
+ int idx = 0;
+ u64 addr;
+ const char *name = "";
+ static char name_buf[KSYM_NAME_LEN];
+ struct symbol *sym;
+ struct map *kmap;
+ struct machine *machine = con->machine;
+
+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ struct contention_task_data task;
+ int pid = key->pid;
+ int task_fd = bpf_map__fd(skel->maps.task_data);
+
+ /* do not update idle comm which contains CPU number */
+ if (pid) {
+ struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
+
+ if (t == NULL)
+ return name;
+ if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
+ thread__set_comm(t, task.comm, /*timestamp=*/0))
+ name = task.comm;
+ }
+ return name;
+ }
+
+ if (con->aggr_mode == LOCK_AGGR_ADDR) {
+ sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
+ if (sym)
+ name = sym->name;
+ return name;
+ }
+
+ /* LOCK_AGGR_CALLER: skip lock internal functions */
+ while (machine__is_lock_function(machine, stack_trace[idx]) &&
+ idx < con->max_stack - 1)
+ idx++;
+
+ addr = stack_trace[idx];
+ sym = machine__find_kernel_symbol(machine, addr, &kmap);
+
+ if (sym) {
+ unsigned long offset;
+
+ offset = kmap->map_ip(kmap, addr) - sym->start;
+
+ if (offset == 0)
+ return sym->name;
+
+ snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset);
+ } else {
+ snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr);
+ }
+
+ return name_buf;
+}
+
int lock_contention_read(struct lock_contention *con)
{
- int fd, stack, task_fd, err = 0;
+ int fd, stack, err = 0;
struct contention_key *prev_key, key;
struct contention_data data = {};
struct lock_stat *st = NULL;
@@ -175,7 +240,6 @@ int lock_contention_read(struct lock_contention *con)
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
- task_fd = bpf_map__fd(skel->maps.task_data);
con->lost = skel->bss->lost;
@@ -195,98 +259,75 @@ int lock_contention_read(struct lock_contention *con)
prev_key = NULL;
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
- struct map *kmap;
- struct symbol *sym;
- int idx = 0;
- s32 stack_id;
+ s64 ls_key;
+ const char *name;
/* to handle errors in the loop body */
err = -1;
bpf_map_lookup_elem(fd, &key, &data);
- st = zalloc(sizeof(*st));
- if (st == NULL)
- break;
-
- st->nr_contended = data.count;
- st->wait_time_total = data.total_time;
- st->wait_time_max = data.max_time;
- st->wait_time_min = data.min_time;
-
- if (data.count)
- st->avg_wait_time = data.total_time / data.count;
-
- st->flags = data.flags;
- st->addr = key.aggr_key;
+ if (con->save_callstack) {
+ bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
- if (con->aggr_mode == LOCK_AGGR_TASK) {
- struct contention_task_data task;
- struct thread *t;
- int pid = key.aggr_key;
+ if (!match_callstack_filter(machine, stack_trace))
+ goto next;
+ }
- /* do not update idle comm which contains CPU number */
- if (st->addr) {
- bpf_map_lookup_elem(task_fd, &pid, &task);
- t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
- thread__set_comm(t, task.comm, /*timestamp=*/0);
- }
+ switch (con->aggr_mode) {
+ case LOCK_AGGR_CALLER:
+ ls_key = key.stack_id;
+ break;
+ case LOCK_AGGR_TASK:
+ ls_key = key.pid;
+ break;
+ case LOCK_AGGR_ADDR:
+ ls_key = key.lock_addr;
+ break;
+ default:
goto next;
}
- if (con->aggr_mode == LOCK_AGGR_ADDR) {
- sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
- if (sym)
- st->name = strdup(sym->name);
+ st = lock_stat_find(ls_key);
+ if (st != NULL) {
+ st->wait_time_total += data.total_time;
+ if (st->wait_time_max < data.max_time)
+ st->wait_time_max = data.max_time;
+ if (st->wait_time_min > data.min_time)
+ st->wait_time_min = data.min_time;
+
+ st->nr_contended += data.count;
+ if (st->nr_contended)
+ st->avg_wait_time = st->wait_time_total / st->nr_contended;
goto next;
}
- stack_id = key.aggr_key;
- bpf_map_lookup_elem(stack, &stack_id, stack_trace);
-
- /* skip lock internal functions */
- while (machine__is_lock_function(machine, stack_trace[idx]) &&
- idx < con->max_stack - 1)
- idx++;
-
- st->addr = stack_trace[idx];
- sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
-
- if (sym) {
- unsigned long offset;
- int ret = 0;
-
- offset = kmap->map_ip(kmap, st->addr) - sym->start;
+ name = lock_contention_get_name(con, &key, stack_trace);
+ st = lock_stat_findnew(ls_key, name, data.flags);
+ if (st == NULL)
+ break;
- if (offset)
- ret = asprintf(&st->name, "%s+%#lx", sym->name, offset);
- else
- st->name = strdup(sym->name);
+ st->nr_contended = data.count;
+ st->wait_time_total = data.total_time;
+ st->wait_time_max = data.max_time;
+ st->wait_time_min = data.min_time;
- if (ret < 0 || st->name == NULL)
- break;
- } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) {
- break;
- }
+ if (data.count)
+ st->avg_wait_time = data.total_time / data.count;
- if (verbose > 0) {
+ if (con->save_callstack) {
st->callstack = memdup(stack_trace, stack_size);
if (st->callstack == NULL)
break;
}
+
next:
- hlist_add_head(&st->hash_entry, con->result);
prev_key = &key;
- /* we're fine now, reset the values */
- st = NULL;
+ /* we're fine now, reset the error */
err = 0;
}
free(stack_trace);
- if (st) {
- free(st->name);
- free(st);
- }
return err;
}
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index ad0ca5d50557..e6007eaeda1a 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -10,6 +10,14 @@
/* default buffer size */
#define MAX_ENTRIES 10240
+/* lock contention flags from include/trace/events/lock.h */
+#define LCB_F_SPIN (1U << 0)
+#define LCB_F_READ (1U << 1)
+#define LCB_F_WRITE (1U << 2)
+#define LCB_F_RT (1U << 3)
+#define LCB_F_PERCPU (1U << 4)
+#define LCB_F_MUTEX (1U << 5)
+
struct tstamp_data {
__u64 timestamp;
__u64 lock;
@@ -76,13 +84,23 @@ struct {
__uint(max_entries, 1);
} addr_filter SEC(".maps");
+struct rw_semaphore___old {
+ struct task_struct *owner;
+} __attribute__((preserve_access_index));
+
+struct rw_semaphore___new {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
/* control flags */
int enabled;
int has_cpu;
int has_task;
int has_type;
int has_addr;
+int needs_callstack;
int stack_skip;
+int lock_owner;
/* determine the key of lock stat */
int aggr_mode;
@@ -131,17 +149,59 @@ static inline int can_record(u64 *ctx)
return 1;
}
-static inline void update_task_data(__u32 pid)
+static inline int update_task_data(struct task_struct *task)
{
struct contention_task_data *p;
+ int pid, err;
+
+ err = bpf_core_read(&pid, sizeof(pid), &task->pid);
+ if (err)
+ return -1;
p = bpf_map_lookup_elem(&task_data, &pid);
if (p == NULL) {
- struct contention_task_data data;
+ struct contention_task_data data = {};
- bpf_get_current_comm(data.comm, sizeof(data.comm));
+ BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
}
+
+ return 0;
+}
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
+{
+ struct task_struct *task;
+ __u64 owner = 0;
+
+ if (flags & LCB_F_MUTEX) {
+ struct mutex *mutex = (void *)lock;
+ owner = BPF_CORE_READ(mutex, owner.counter);
+ } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
+#if __has_builtin(bpf_core_type_matches)
+ if (bpf_core_type_matches(struct rw_semaphore___old)) {
+ struct rw_semaphore___old *rwsem = (void *)lock;
+ owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
+ } else if (bpf_core_type_matches(struct rw_semaphore___new)) {
+ struct rw_semaphore___new *rwsem = (void *)lock;
+ owner = BPF_CORE_READ(rwsem, owner.counter);
+ }
+#else
+ /* assume new struct */
+ struct rw_semaphore *rwsem = (void *)lock;
+ owner = BPF_CORE_READ(rwsem, owner.counter);
+#endif
+ }
+
+ if (!owner)
+ return NULL;
+
+ task = (void *)(owner & ~7UL);
+ return task;
}
SEC("tp_btf/contention_begin")
@@ -173,11 +233,31 @@ int contention_begin(u64 *ctx)
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
- if (aggr_mode == LOCK_AGGR_CALLER) {
+ if (needs_callstack) {
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
lost++;
+ } else if (aggr_mode == LOCK_AGGR_TASK) {
+ struct task_struct *task;
+
+ if (lock_owner) {
+ task = get_lock_owner(pelem->lock, pelem->flags);
+
+ /* The flags is not used anymore. Pass the owner pid. */
+ if (task)
+ pelem->flags = BPF_CORE_READ(task, pid);
+ else
+ pelem->flags = -1U;
+
+ } else {
+ task = bpf_get_current_task_btf();
+ }
+
+ if (task) {
+ if (update_task_data(task) < 0 && lock_owner)
+ pelem->flags = -1U;
+ }
}
return 0;
@@ -188,7 +268,7 @@ int contention_end(u64 *ctx)
{
__u32 pid;
struct tstamp_data *pelem;
- struct contention_key key;
+ struct contention_key key = {};
struct contention_data *data;
__u64 duration;
@@ -204,14 +284,20 @@ int contention_end(u64 *ctx)
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
- key.aggr_key = pelem->stack_id;
+ key.stack_id = pelem->stack_id;
break;
case LOCK_AGGR_TASK:
- key.aggr_key = pid;
- update_task_data(pid);
+ if (lock_owner)
+ key.pid = pelem->flags;
+ else
+ key.pid = pid;
+ if (needs_callstack)
+ key.stack_id = pelem->stack_id;
break;
case LOCK_AGGR_ADDR:
- key.aggr_key = pelem->lock;
+ key.lock_addr = pelem->lock;
+ if (needs_callstack)
+ key.stack_id = pelem->stack_id;
break;
default:
/* should not happen */
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index ce71cf1a7e1e..3d35fd4407ac 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -4,7 +4,9 @@
#define UTIL_BPF_SKEL_LOCK_DATA_H
struct contention_key {
- u64 aggr_key; /* can be stack_id, pid or lock addr */
+ u32 stack_id;
+ u32 pid;
+ u64 lock_addr;
};
#define TASK_COMM_LEN 16
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index 6d38238481d3..378f16a24751 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -212,3 +212,18 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
return printed;
}
+
+const char *branch_spec_desc(int spec)
+{
+ const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = {
+ "N/A",
+ "SPEC_WRONG_PATH",
+ "NON_SPEC_CORRECT_PATH",
+ "SPEC_CORRECT_PATH",
+ };
+
+ if (spec >= 0 && spec < PERF_BR_SPEC_MAX)
+ return branch_spec_outcomes[spec];
+
+ return NULL;
+}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 3ed792db1125..e41bfffe2217 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -89,4 +89,6 @@ const char *get_branch_type(struct branch_entry *e);
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+const char *branch_spec_desc(int spec);
+
#endif /* _PERF_BRANCH_H */
diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h
index dec8c0fb1f4a..fe6d5b60a031 100644
--- a/tools/perf/util/cacheline.h
+++ b/tools/perf/util/cacheline.h
@@ -6,16 +6,31 @@
int __pure cacheline_size(void);
-static inline u64 cl_address(u64 address)
+
+/*
+ * Some architectures have 'Adjacent Cacheline Prefetch' feature,
+ * which performs like the cacheline size being doubled.
+ */
+static inline u64 cl_address(u64 address, bool double_cl)
{
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
/* return the cacheline of the address */
- return (address & ~(cacheline_size() - 1));
+ return (address & ~(size - 1));
}
-static inline u64 cl_offset(u64 address)
+static inline u64 cl_offset(u64 address, bool double_cl)
{
- /* return the cacheline of the address */
- return (address & (cacheline_size() - 1));
+ u64 size = cacheline_size();
+
+ if (double_cl)
+ size *= 2;
+
+ /* return the offset inside cacheline */
+ return (address & (size - 1));
}
#endif // PERF_CACHELINE_H
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index cd978c240e0d..bfb13306d82c 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -481,7 +481,6 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
nr_cgroups++;
if (metric_events) {
- perf_stat__collect_metric_expr(tmp_list);
if (metricgroup__copy_metric_events(tmp_list, cgrp,
metric_events,
&orig_metric_events) < 0)
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 1a3ff6449158..e08797c3cdbc 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -422,8 +422,6 @@ void numa_topology__delete(struct numa_topology *tp)
static int load_hybrid_node(struct hybrid_topology_node *node,
struct perf_pmu *pmu)
{
- const char *sysfs;
- char path[PATH_MAX];
char *buf = NULL, *p;
FILE *fp;
size_t len = 0;
@@ -432,12 +430,7 @@ static int load_hybrid_node(struct hybrid_topology_node *node,
if (!node->pmu_name)
return -1;
- sysfs = sysfs__mountpoint();
- if (!sysfs)
- goto err;
-
- snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name);
- fp = fopen(path, "r");
+ fp = perf_pmu__open_file(pmu, "cpus");
if (!fp)
goto err;
diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c
index 597542410854..5f48b756c4cf 100644
--- a/tools/perf/util/cs-etm-base.c
+++ b/tools/perf/util/cs-etm-base.c
@@ -36,7 +36,22 @@ static const char * const cs_etmv4_priv_fmts[] = {
[CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n",
[CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n",
[CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
- [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n"
+ [CS_ETMV4_TS_SOURCE] = " TS_SOURCE %lld\n",
+};
+
+static const char * const cs_ete_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
+ [CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n",
+ [CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
+ [CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n",
+ [CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n",
+ [CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n",
+ [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n",
+ [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
+ [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n",
+ [CS_ETE_TS_SOURCE] = " TS_SOURCE %lld\n",
};
static const char * const param_unk_fmt =
@@ -96,19 +111,22 @@ static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
else
fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
}
- } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
- /*
- * ETE and ETMv4 can be printed in the same block because the number of parameters
- * is saved and they share the list of parameter names. ETE is also only supported
- * in V1 files.
- */
+ } else if (magic == __perf_cs_etmv4_magic) {
for (j = 0; j < total_params; j++, i++) {
/* if newer record - could be excess params */
- if (j >= CS_ETE_PRIV_MAX)
+ if (j >= CS_ETMV4_PRIV_MAX)
fprintf(stdout, param_unk_fmt, j, val[i]);
else
fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
}
+ } else if (magic == __perf_cs_ete_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETE_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
+ fprintf(stdout, cs_ete_priv_fmts[j], val[i]);
+ }
} else {
/* failure - note bad magic value and error out */
fprintf(stdout, magic_unk_fmt, magic);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 31fa3b45134a..d0e521dfcf35 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -30,6 +30,15 @@
#endif
#endif
+/*
+ * Assume a maximum of 0.1ns elapsed per instruction. This would be the
+ * case with a theoretical 10GHz core executing 1 instruction per cycle.
+ * Used to estimate the sample time for synthesized instructions because
+ * Coresight only emits a timestamp for a range of instructions rather
+ * than per instruction.
+ */
+const u32 INSTR_PER_NS = 10;
+
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
@@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
return 1;
}
+/*
+ * Calculate the number of nanoseconds elapsed.
+ *
+ * instr_count is updated in place with the remainder of the instructions
+ * which didn't make up a whole nanosecond.
+ */
+static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count)
+{
+ const u32 instr_copy = *instr_count;
+
+ *instr_count %= INSTR_PER_NS;
+ return instr_copy / INSTR_PER_NS;
+}
+
static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
ocsd_etmv3_cfg *config)
{
@@ -260,15 +283,17 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
struct cs_etm_packet_queue *packet_queue,
const uint8_t trace_chan_id)
{
+ u64 estimated_ts;
+
/* No timestamp packet has been received, nothing to do */
- if (!packet_queue->cs_timestamp)
+ if (!packet_queue->next_cs_timestamp)
return OCSD_RESP_CONT;
- packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
+ estimated_ts = packet_queue->cs_timestamp +
+ cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count);
- /* Estimate the timestamp for the next range packet */
- packet_queue->next_cs_timestamp += packet_queue->instr_count;
- packet_queue->instr_count = 0;
+ /* Estimated TS can never be higher than the next real one in the trace */
+ packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts);
/* Tell the front end which traceid_queue needs attention */
cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
@@ -283,6 +308,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
const ocsd_trc_index_t indx)
{
struct cs_etm_packet_queue *packet_queue;
+ u64 converted_timestamp;
+ u64 estimated_first_ts;
/* First get the packet queue for this traceID */
packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
@@ -290,17 +317,28 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
return OCSD_RESP_FATAL_SYS_ERR;
/*
+ * Coresight timestamps are raw timer values which need to be scaled to ns. Assume
+ * 0 is a bad value so don't try to convert it.
+ */
+ converted_timestamp = elem->timestamp ?
+ cs_etm__convert_sample_time(etmq, elem->timestamp) : 0;
+
+ /*
* We've seen a timestamp packet before - simply record the new value.
* Function do_soft_timestamp() will report the value to the front end,
* hence asking the decoder to keep decoding rather than stopping.
*/
- if (packet_queue->cs_timestamp) {
- packet_queue->next_cs_timestamp = elem->timestamp;
+ if (packet_queue->next_cs_timestamp) {
+ /*
+ * What was next is now where new ranges start from, overwriting
+ * any previous estimate in cs_timestamp
+ */
+ packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
+ packet_queue->next_cs_timestamp = converted_timestamp;
return OCSD_RESP_CONT;
}
-
- if (!elem->timestamp) {
+ if (!converted_timestamp) {
/*
* Zero timestamps can be seen due to misconfiguration or hardware bugs.
* Warn once, and don't try to subtract instr_count as it would result in an
@@ -312,7 +350,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
indx);
- } else if (packet_queue->instr_count > elem->timestamp) {
+ } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) {
/*
* Sanity check that the elem->timestamp - packet_queue->instr_count would not
* result in an underflow. Warn and clamp at 0 if it would.
@@ -325,11 +363,14 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
* or a discontinuity. Since timestamps packets are generated *after*
* range packets have been generated, we need to estimate the time at
* which instructions started by subtracting the number of instructions
- * executed to the timestamp.
+ * executed to the timestamp. Don't estimate earlier than the last used
+ * timestamp though.
*/
- packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+ estimated_first_ts = converted_timestamp -
+ (packet_queue->instr_count / INSTR_PER_NS);
+ packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts);
}
- packet_queue->next_cs_timestamp = elem->timestamp;
+ packet_queue->next_cs_timestamp = converted_timestamp;
packet_queue->instr_count = 0;
/* Tell the front end which traceid_queue needs attention */
@@ -342,7 +383,6 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
static void
cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
{
- packet_queue->cs_timestamp = 0;
packet_queue->next_cs_timestamp = 0;
packet_queue->instr_count = 0;
}
@@ -604,6 +644,9 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_CUSTOM:
case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
case OCSD_GEN_TRC_ELEM_MEMTRANS:
+#if (OCSD_VER_NUM >= 0x010400)
+ case OCSD_GEN_TRC_ELEM_INSTRUMENTATION:
+#endif
default:
break;
}
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 33303d03c2fa..f65bac5ddbdb 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -35,6 +35,7 @@
#include "tool.h"
#include "thread.h"
#include "thread-stack.h"
+#include "tsc.h"
#include <tools/libc_compat.h>
#include "util/synthetic-events.h"
@@ -46,10 +47,12 @@ struct cs_etm_auxtrace {
struct perf_session *session;
struct machine *machine;
struct thread *unknown_thread;
+ struct perf_tsc_conversion tc;
u8 timeless_decoding;
u8 snapshot_mode;
u8 data_queued;
+ u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
int num_cpu;
u64 latest_kernel_timestamp;
@@ -464,12 +467,12 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
u64 **metadata = etm->metadata;
t_params[idx].protocol = CS_ETM_PROTO_ETE;
- t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
- t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
- t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
- t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
- t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
- t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+ t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0];
+ t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1];
+ t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2];
+ t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8];
+ t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR];
+ t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR];
t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
}
@@ -1161,6 +1164,30 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
sample->insn_len, (void *)sample->insn);
}
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ if (etm->has_virtual_ts)
+ return tsc_to_perf_time(cs_timestamp, &etm->tc);
+ else
+ return cs_timestamp;
+}
+
+static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
+
+ if (etm->timeless_decoding)
+ return 0;
+ else if (etm->has_virtual_ts)
+ return packet_queue->cs_timestamp;
+ else
+ return etm->latest_kernel_timestamp;
+}
+
static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq,
u64 addr, u64 period)
@@ -1174,8 +1201,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
event->sample.header.size = sizeof(struct perf_event_header);
- if (!etm->timeless_decoding)
- sample.time = etm->latest_kernel_timestamp;
+ /* Set time field based on etm auxtrace config. */
+ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
sample.ip = addr;
sample.pid = tidq->pid;
sample.tid = tidq->tid;
@@ -1232,8 +1260,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
- if (!etm->timeless_decoding)
- sample.time = etm->latest_kernel_timestamp;
+ /* Set time field based on etm auxtrace config. */
+ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
sample.ip = ip;
sample.pid = tidq->pid;
sample.tid = tidq->tid;
@@ -2746,12 +2775,42 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
return 0;
}
+#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
+ (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
+
+/*
+ * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
+ * timestamps).
+ */
+static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
+{
+ int j;
+
+ for (j = 0; j < num_cpu; j++) {
+ switch (metadata[j][CS_ETM_MAGIC]) {
+ case __perf_cs_etmv4_magic:
+ if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
+ return false;
+ break;
+ case __perf_cs_ete_magic:
+ if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
+ return false;
+ break;
+ default:
+ /* Unknown / unsupported magic number. */
+ return false;
+ }
+ }
+ return true;
+}
+
int cs_etm__process_auxtrace_info_full(union perf_event *event,
struct perf_session *session)
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
struct cs_etm_auxtrace *etm = NULL;
struct int_node *inode;
+ struct perf_record_time_conv *tc = &session->time_conv;
int event_header_size = sizeof(struct perf_event_header);
int total_size = auxtrace_info->header.size;
int priv_size = 0;
@@ -2886,6 +2945,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
etm->auxtrace_type = auxtrace_info->type;
etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
+ /* Use virtual timestamps if all ETMs report ts_source = 1 */
+ etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
+
+ if (!etm->has_virtual_ts)
+ ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
+ "The time field of the samples will not be set accurately.\n\n");
+
etm->auxtrace.process_event = cs_etm__process_event;
etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
etm->auxtrace.flush_events = cs_etm__flush_events;
@@ -2915,6 +2981,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
goto err_delete_thread;
}
+ etm->tc.time_shift = tc->time_shift;
+ etm->tc.time_mult = tc->time_mult;
+ etm->tc.time_zero = tc->time_zero;
+ if (event_contains(*tc, time_cycles)) {
+ etm->tc.time_cycles = tc->time_cycles;
+ etm->tc.time_mask = tc->time_mask;
+ etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
+ etm->tc.cap_user_time_short = tc->cap_user_time_short;
+ }
err = cs_etm__synth_events(etm, session);
if (err)
goto err_delete_thread;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 5da50d5dae6b..98a4f7113d2f 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -71,6 +71,7 @@ enum {
CS_ETMV4_TRCIDR2,
CS_ETMV4_TRCIDR8,
CS_ETMV4_TRCAUTHSTATUS,
+ CS_ETMV4_TS_SOURCE,
CS_ETMV4_PRIV_MAX,
};
@@ -82,7 +83,17 @@ enum {
* added in header V1
*/
enum {
- CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX,
+ /* Dynamic, configurable parameters */
+ CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
+ CS_ETE_TRCTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETE_TRCIDR0,
+ CS_ETE_TRCIDR1,
+ CS_ETE_TRCIDR2,
+ CS_ETE_TRCIDR8,
+ CS_ETE_TRCAUTHSTATUS,
+ CS_ETE_TRCDEVARCH,
+ CS_ETE_TS_SOURCE,
CS_ETE_PRIV_MAX
};
@@ -181,7 +192,7 @@ struct cs_etm_packet_queue {
u32 head;
u32 tail;
u32 instr_count;
- u64 cs_timestamp;
+ u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */
u64 next_cs_timestamp;
struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
};
@@ -220,6 +231,7 @@ struct cs_etm_packet_queue
*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
struct perf_session *session __maybe_unused);
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp);
#else
static inline int
cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index b842273458b8..2b732bccabad 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -322,10 +322,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(flags))
offset += fmtf->offset + fmtf->size;
-#endif
}
if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 190e818a0717..88378c4c5dd9 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -19,12 +19,19 @@
#include "debug.h"
#include "print_binary.h"
#include "target.h"
+#include "trace-event.h"
#include "ui/helpline.h"
#include "ui/ui.h"
#include "util/parse-sublevel-options.h"
#include <linux/ctype.h>
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#else
+#define LIBTRACEEVENT_VERSION 0
+#endif
+
int verbose;
int debug_peo_args;
bool dump_trace = false, quiet = false;
@@ -228,6 +235,14 @@ int perf_debug_option(const char *str)
/* Allow only verbose value in range (0, 10), otherwise set 0. */
verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
+#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0)
+ if (verbose == 1)
+ tep_set_loglevel(TEP_LOG_INFO);
+ else if (verbose == 2)
+ tep_set_loglevel(TEP_LOG_DEBUG);
+ else if (verbose >= 3)
+ tep_set_loglevel(TEP_LOG_ALL);
+#endif
return 0;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 999dd1700502..51e8ce6edddc 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -285,8 +285,6 @@ void evsel__init(struct evsel *evsel,
evsel->sample_size = __evsel__sample_size(attr->sample_type);
evsel__calc_id_pos(evsel);
evsel->cmdline_group_boundary = false;
- evsel->metric_expr = NULL;
- evsel->metric_name = NULL;
evsel->metric_events = NULL;
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
@@ -2319,7 +2317,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
* abort:1 //transaction abort
* cycles:16 //cycle count to last branch
* type:4 //branch type
- * reserved:40
+ * spec:2 //branch speculation info
+ * new_type:4 //additional branch type
+ * priv:3 //privilege level
+ * reserved:31
* }
* }
*
@@ -2335,7 +2336,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
new_val |= bitfield_swap(value, 3, 1);
new_val |= bitfield_swap(value, 4, 16);
new_val |= bitfield_swap(value, 20, 4);
- new_val |= bitfield_swap(value, 24, 40);
+ new_val |= bitfield_swap(value, 24, 2);
+ new_val |= bitfield_swap(value, 26, 4);
+ new_val |= bitfield_swap(value, 30, 3);
+ new_val |= bitfield_swap(value, 33, 31);
} else {
new_val = bitfield_swap(value, 63, 1);
new_val |= bitfield_swap(value, 62, 1);
@@ -2343,7 +2347,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
new_val |= bitfield_swap(value, 60, 1);
new_val |= bitfield_swap(value, 44, 16);
new_val |= bitfield_swap(value, 40, 4);
- new_val |= bitfield_swap(value, 0, 40);
+ new_val |= bitfield_swap(value, 38, 2);
+ new_val |= bitfield_swap(value, 34, 4);
+ new_val |= bitfield_swap(value, 31, 3);
+ new_val |= bitfield_swap(value, 0, 31);
}
return new_val;
@@ -2784,10 +2791,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(sample->raw_data + field->offset);
offset &= 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (field->flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(field->flags))
offset += field->offset + field->size;
-#endif
}
return sample->raw_data + offset;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d572be41b960..24cb807ef6ce 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -105,8 +105,6 @@ struct evsel {
* metric fields are similar, but needs more care as they can have
* references to other metric (evsel).
*/
- const char * metric_expr;
- const char * metric_name;
struct evsel **metric_events;
struct evsel *metric_leader;
diff --git a/tools/perf/util/evswitch.h b/tools/perf/util/evswitch.h
index fd30460b6218..8ffdbe526d98 100644
--- a/tools/perf/util/evswitch.h
+++ b/tools/perf/util/evswitch.h
@@ -22,9 +22,9 @@ bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel);
#define OPTS_EVSWITCH(evswitch) \
OPT_STRING(0, "switch-on", &(evswitch)->on_name, \
- "event", "Consider events after the ocurrence of this event"), \
+ "event", "Consider events after the occurrence of this event"), \
OPT_STRING(0, "switch-off", &(evswitch)->off_name, \
- "event", "Stop considering events after the ocurrence of this event"), \
+ "event", "Stop considering events after the occurrence of this event"), \
OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events, \
"Show the on/off switch events, used with --switch-on and --switch-off")
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 00dcde35e0d3..c1da20b868db 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -19,6 +19,7 @@
#include <linux/zalloc.h>
#include <ctype.h>
#include <math.h>
+#include "pmu.h"
#ifdef PARSER_DEBUG
extern int expr_debug;
@@ -448,6 +449,10 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
result = topology->core_cpus_lists;
goto out;
}
+ if (!strcmp("#slots", literal)) {
+ result = perf_pmu__cpu_slots_per_cycle();
+ goto out;
+ }
pr_err("Unrecognized literal '%s'", literal);
out:
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 029271540fb0..eaa44b24c555 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -9,6 +9,7 @@ struct expr_scanner_ctx {
char *user_requested_cpu_list;
int runtime;
bool system_wide;
+ bool is_test;
};
struct expr_parse_ctx {
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index d47de5f270a8..4fbf353e78e7 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -87,9 +87,11 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
YYSTYPE *yylval = expr_get_lval(scanner);
yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
- if (isnan(yylval->num))
- return EXPR_ERROR;
-
+ if (isnan(yylval->num)) {
+ if (!sctx->is_test)
+ return EXPR_ERROR;
+ yylval->num = 1;
+ }
return LITERAL;
}
%}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 6d3921627e33..955c1b9dc6a4 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -5,6 +5,7 @@
*/
#include <inttypes.h>
+#include <linux/perf_event.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
@@ -98,6 +99,10 @@ struct intel_pt {
u64 instructions_sample_type;
u64 instructions_id;
+ bool sample_cycles;
+ u64 cycles_sample_type;
+ u64 cycles_id;
+
bool sample_branches;
u32 branches_filter;
u64 branches_sample_type;
@@ -214,6 +219,8 @@ struct intel_pt_queue {
u64 ipc_cyc_cnt;
u64 last_in_insn_cnt;
u64 last_in_cyc_cnt;
+ u64 last_cy_insn_cnt;
+ u64 last_cy_cyc_cnt;
u64 last_br_insn_cnt;
u64 last_br_cyc_cnt;
unsigned int cbr_seen;
@@ -1319,7 +1326,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
- if (pt->synth_opts.instructions) {
+ if (pt->synth_opts.instructions || pt->synth_opts.cycles) {
if (pt->synth_opts.period) {
switch (pt->synth_opts.period_type) {
case PERF_ITRACE_PERIOD_INSTRUCTIONS:
@@ -1830,6 +1837,33 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
pt->instructions_sample_type);
}
+static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ u64 period = 0;
+
+ if (ptq->sample_ipc)
+ period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
+
+ if (!period || intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->cycles_id;
+ sample.stream_id = ptq->pt->cycles_id;
+ sample.period = period;
+
+ sample.cyc_cnt = period;
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt;
+ ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
+
+ return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+}
+
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
@@ -2598,10 +2632,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
}
}
- if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
- err = intel_pt_synth_instruction_sample(ptq);
- if (err)
- return err;
+ if (state->type & INTEL_PT_INSTRUCTION) {
+ if (pt->sample_instructions) {
+ err = intel_pt_synth_instruction_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (pt->sample_cycles) {
+ err = intel_pt_synth_cycle_sample(ptq);
+ if (err)
+ return err;
+ }
}
if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
@@ -3731,6 +3772,22 @@ static int intel_pt_synth_events(struct intel_pt *pt,
id += 1;
}
+ if (pt->synth_opts.cycles) {
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ err = intel_pt_synth_event(session, "cycles", &attr, id);
+ if (err)
+ return err;
+ pt->sample_cycles = true;
+ pt->cycles_sample_type = attr.sample_type;
+ pt->cycles_id = id;
+ id += 1;
+ }
+
attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
attr.sample_period = 1;
@@ -4379,6 +4436,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_setup_pebs_events(pt);
+ if (perf_data__is_pipe(session->data)) {
+ pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
+ " The output cannot relied upon. In particular,\n"
+ " timestamps and the order of events may be incorrect.\n");
+ }
+
if (pt->sampling_mode || list_empty(&session->auxtrace_index))
err = auxtrace_queue_data(session, true, true);
else
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 650ffe336f3a..4e8e243a6e4b 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -531,14 +531,37 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
pr_debug("llvm compiling command template: %s\n", template);
+ /*
+ * Below, substitute control characters for values that can cause the
+ * echo to misbehave, then substitute the values back.
+ */
err = -ENOMEM;
- if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
+ if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0)
goto errout;
+#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0)
+ for (char *p = command_echo; *p; p++) {
+ SWAP_CHAR('<', '\001');
+ SWAP_CHAR('>', '\002');
+ SWAP_CHAR('"', '\003');
+ SWAP_CHAR('\'', '\004');
+ SWAP_CHAR('|', '\005');
+ SWAP_CHAR('&', '\006');
+ SWAP_CHAR('\a', '"');
+ }
err = read_from_pipe(command_echo, (void **) &command_out, NULL);
if (err)
goto errout;
+ for (char *p = command_out; *p; p++) {
+ SWAP_CHAR('\001', '<');
+ SWAP_CHAR('\002', '>');
+ SWAP_CHAR('\003', '"');
+ SWAP_CHAR('\004', '\'');
+ SWAP_CHAR('\005', '|');
+ SWAP_CHAR('\006', '&');
+ }
+#undef SWAP_CHAR
pr_debug("llvm compiling command : %s\n", command_out);
err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index b99e83fccf5c..040b618b2215 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -65,6 +65,11 @@ struct lock_stat {
*/
#define MAX_LOCK_DEPTH 48
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack);
+
/*
* struct lock_seq_stat:
* Place to put on state of one lock sequence
@@ -128,6 +133,8 @@ struct lock_contention {
int max_stack;
int stack_skip;
int aggr_mode;
+ int owner;
+ bool save_callstack;
};
#ifdef HAVE_BPF_SKEL
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index b9c273ed080a..f3559be95541 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -167,14 +167,14 @@ static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
}
-static bool metricgroup__has_constraint(const struct pmu_event *pe)
+static bool metricgroup__has_constraint(const struct pmu_metric *pm)
{
- if (!pe->metric_constraint)
+ if (!pm->metric_constraint)
return false;
- if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+ if (!strcmp(pm->metric_constraint, "NO_NMI_WATCHDOG") &&
sysctl__nmi_watchdog_enabled()) {
- metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+ metricgroup___watchdog_constraint_hint(pm->metric_name, false);
return true;
}
@@ -193,7 +193,7 @@ static void metric__free(struct metric *m)
free(m);
}
-static struct metric *metric__new(const struct pmu_event *pe,
+static struct metric *metric__new(const struct pmu_metric *pm,
const char *modifier,
bool metric_no_group,
int runtime,
@@ -210,15 +210,15 @@ static struct metric *metric__new(const struct pmu_event *pe,
if (!m->pctx)
goto out_err;
- m->metric_name = pe->metric_name;
+ m->metric_name = pm->metric_name;
m->modifier = NULL;
if (modifier) {
m->modifier = strdup(modifier);
if (!m->modifier)
goto out_err;
}
- m->metric_expr = pe->metric_expr;
- m->metric_unit = pe->unit;
+ m->metric_expr = pm->metric_expr;
+ m->metric_unit = pm->unit;
m->pctx->sctx.user_requested_cpu_list = NULL;
if (user_requested_cpu_list) {
m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list);
@@ -227,7 +227,7 @@ static struct metric *metric__new(const struct pmu_event *pe,
}
m->pctx->sctx.runtime = runtime;
m->pctx->sctx.system_wide = system_wide;
- m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ m->has_constraint = metric_no_group || metricgroup__has_constraint(pm);
m->metric_refs = NULL;
m->evlist = NULL;
@@ -348,10 +348,10 @@ static bool match_metric(const char *n, const char *list)
return false;
}
-static bool match_pe_metric(const struct pmu_event *pe, const char *metric)
+static bool match_pm_metric(const struct pmu_metric *pm, const char *metric)
{
- return match_metric(pe->metric_group, metric) ||
- match_metric(pe->metric_name, metric);
+ return match_metric(pm->metric_group, metric) ||
+ match_metric(pm->metric_name, metric);
}
/** struct mep - RB-tree node for building printing information. */
@@ -420,13 +420,13 @@ static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
return NULL;
}
-static int metricgroup__add_to_mep_groups(const struct pmu_event *pe,
+static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
struct rblist *groups)
{
const char *g;
char *omg, *mg;
- mg = strdup(pe->metric_group ?: "No_group");
+ mg = strdup(pm->metric_group ?: "No_group");
if (!mg)
return -ENOMEM;
omg = mg;
@@ -435,15 +435,15 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe,
g = skip_spaces(g);
if (strlen(g))
- me = mep_lookup(groups, g, pe->metric_name);
+ me = mep_lookup(groups, g, pm->metric_name);
else
- me = mep_lookup(groups, "No_group", pe->metric_name);
+ me = mep_lookup(groups, "No_group", pm->metric_name);
if (me) {
- me->metric_desc = pe->desc;
- me->metric_long_desc = pe->long_desc;
- me->metric_expr = pe->metric_expr;
- me->metric_unit = pe->unit;
+ me->metric_desc = pm->desc;
+ me->metric_long_desc = pm->long_desc;
+ me->metric_expr = pm->metric_expr;
+ me->metric_unit = pm->unit;
}
}
free(omg);
@@ -452,64 +452,61 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe,
}
struct metricgroup_iter_data {
- pmu_event_iter_fn fn;
+ pmu_metric_iter_fn fn;
void *data;
};
-static int metricgroup__sys_event_iter(const struct pmu_event *pe,
- const struct pmu_events_table *table,
+static int metricgroup__sys_event_iter(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table,
void *data)
{
struct metricgroup_iter_data *d = data;
struct perf_pmu *pmu = NULL;
- if (!pe->metric_expr || !pe->compat)
+ if (!pm->metric_expr || !pm->compat)
return 0;
while ((pmu = perf_pmu__scan(pmu))) {
- if (!pmu->id || strcmp(pmu->id, pe->compat))
+ if (!pmu->id || strcmp(pmu->id, pm->compat))
continue;
- return d->fn(pe, table, d->data);
+ return d->fn(pm, table, d->data);
}
return 0;
}
-static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
- void *vdata)
+static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
{
struct rblist *groups = vdata;
- if (!pe->metric_name)
- return 0;
-
- return metricgroup__add_to_mep_groups(pe, groups);
+ return metricgroup__add_to_mep_groups(pm, groups);
}
void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
{
struct rblist groups;
- const struct pmu_events_table *table;
+ const struct pmu_metrics_table *table;
struct rb_node *node, *next;
rblist__init(&groups);
groups.node_new = mep_new;
groups.node_cmp = mep_cmp;
groups.node_delete = mep_delete;
- table = pmu_events_table__find();
+ table = pmu_metrics_table__find();
if (table) {
- pmu_events_table_for_each_event(table,
- metricgroup__add_to_mep_groups_callback,
- &groups);
+ pmu_metrics_table_for_each_metric(table,
+ metricgroup__add_to_mep_groups_callback,
+ &groups);
}
{
struct metricgroup_iter_data data = {
.fn = metricgroup__add_to_mep_groups_callback,
.data = &groups,
};
- pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+ pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
}
for (node = rb_first_cached(&groups.entries); node; node = next) {
@@ -743,7 +740,7 @@ static int metricgroup__build_event_string(struct strbuf *events,
#undef RETURN_IF_NON_ZERO
}
-int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused)
+int __weak arch_get_runtimeparam(const struct pmu_metric *pm __maybe_unused)
{
return 1;
}
@@ -768,22 +765,22 @@ struct metricgroup_add_iter_data {
bool system_wide;
struct metric *root_metric;
const struct visited_metric *visited;
- const struct pmu_events_table *table;
+ const struct pmu_metrics_table *table;
};
static bool metricgroup__find_metric(const char *metric,
- const struct pmu_events_table *table,
- struct pmu_event *pe);
+ const struct pmu_metrics_table *table,
+ struct pmu_metric *pm);
static int add_metric(struct list_head *metric_list,
- const struct pmu_event *pe,
+ const struct pmu_metric *pm,
const char *modifier,
bool metric_no_group,
const char *user_requested_cpu_list,
bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
- const struct pmu_events_table *table);
+ const struct pmu_metrics_table *table);
/**
* resolve_metric - Locate metrics within the root metric and recursively add
@@ -810,16 +807,16 @@ static int resolve_metric(struct list_head *metric_list,
bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
struct hashmap_entry *cur;
size_t bkt;
struct to_resolve {
/* The metric to resolve. */
- struct pmu_event pe;
+ struct pmu_metric pm;
/*
* The key in the IDs map, this may differ from in case,
- * etc. from pe->metric_name.
+ * etc. from pm->metric_name.
*/
const char *key;
} *pending = NULL;
@@ -830,15 +827,15 @@ static int resolve_metric(struct list_head *metric_list,
* the pending array.
*/
hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
- struct pmu_event pe;
+ struct pmu_metric pm;
- if (metricgroup__find_metric(cur->pkey, table, &pe)) {
+ if (metricgroup__find_metric(cur->pkey, table, &pm)) {
pending = realloc(pending,
(pending_cnt + 1) * sizeof(struct to_resolve));
if (!pending)
return -ENOMEM;
- memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe));
+ memcpy(&pending[pending_cnt].pm, &pm, sizeof(pm));
pending[pending_cnt].key = cur->pkey;
pending_cnt++;
}
@@ -853,7 +850,7 @@ static int resolve_metric(struct list_head *metric_list,
* context.
*/
for (i = 0; i < pending_cnt; i++) {
- ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group,
+ ret = add_metric(metric_list, &pending[i].pm, modifier, metric_no_group,
user_requested_cpu_list, system_wide, root_metric, visited,
table);
if (ret)
@@ -867,7 +864,7 @@ static int resolve_metric(struct list_head *metric_list,
/**
* __add_metric - Add a metric to metric_list.
* @metric_list: The list the metric is added to.
- * @pe: The pmu_event containing the metric to be added.
+ * @pm: The pmu_metric containing the metric to be added.
* @modifier: if non-null event modifiers like "u".
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
@@ -884,7 +881,7 @@ static int resolve_metric(struct list_head *metric_list,
* architecture perf is running upon.
*/
static int __add_metric(struct list_head *metric_list,
- const struct pmu_event *pe,
+ const struct pmu_metric *pm,
const char *modifier,
bool metric_no_group,
int runtime,
@@ -892,19 +889,19 @@ static int __add_metric(struct list_head *metric_list,
bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
const struct visited_metric *vm;
int ret;
bool is_root = !root_metric;
struct visited_metric visited_node = {
- .name = pe->metric_name,
+ .name = pm->metric_name,
.parent = visited,
};
for (vm = visited; vm; vm = vm->parent) {
- if (!strcmp(pe->metric_name, vm->name)) {
- pr_err("failed: recursion detected for %s\n", pe->metric_name);
+ if (!strcmp(pm->metric_name, vm->name)) {
+ pr_err("failed: recursion detected for %s\n", pm->metric_name);
return -1;
}
}
@@ -914,7 +911,7 @@ static int __add_metric(struct list_head *metric_list,
* This metric is the root of a tree and may reference other
* metrics that are added recursively.
*/
- root_metric = metric__new(pe, modifier, metric_no_group, runtime,
+ root_metric = metric__new(pm, modifier, metric_no_group, runtime,
user_requested_cpu_list, system_wide);
if (!root_metric)
return -ENOMEM;
@@ -929,7 +926,7 @@ static int __add_metric(struct list_head *metric_list,
*/
if (root_metric->metric_refs) {
for (; root_metric->metric_refs[cnt].metric_name; cnt++) {
- if (!strcmp(pe->metric_name,
+ if (!strcmp(pm->metric_name,
root_metric->metric_refs[cnt].metric_name))
return 0;
}
@@ -947,8 +944,8 @@ static int __add_metric(struct list_head *metric_list,
* need to change them, so there's no need to create
* our own copy.
*/
- root_metric->metric_refs[cnt].metric_name = pe->metric_name;
- root_metric->metric_refs[cnt].metric_expr = pe->metric_expr;
+ root_metric->metric_refs[cnt].metric_name = pm->metric_name;
+ root_metric->metric_refs[cnt].metric_expr = pm->metric_expr;
/* Null terminate array. */
root_metric->metric_refs[cnt+1].metric_name = NULL;
@@ -959,7 +956,7 @@ static int __add_metric(struct list_head *metric_list,
* For both the parent and referenced metrics, we parse
* all the metric's IDs and add it to the root context.
*/
- if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) {
+ if (expr__find_ids(pm->metric_expr, NULL, root_metric->pctx) < 0) {
/* Broken metric. */
ret = -EINVAL;
} else {
@@ -981,57 +978,57 @@ static int __add_metric(struct list_head *metric_list,
struct metricgroup__find_metric_data {
const char *metric;
- struct pmu_event *pe;
+ struct pmu_metric *pm;
};
-static int metricgroup__find_metric_callback(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
+static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
void *vdata)
{
struct metricgroup__find_metric_data *data = vdata;
- if (!match_metric(pe->metric_name, data->metric))
+ if (!match_metric(pm->metric_name, data->metric))
return 0;
- memcpy(data->pe, pe, sizeof(*pe));
+ memcpy(data->pm, pm, sizeof(*pm));
return 1;
}
static bool metricgroup__find_metric(const char *metric,
- const struct pmu_events_table *table,
- struct pmu_event *pe)
+ const struct pmu_metrics_table *table,
+ struct pmu_metric *pm)
{
struct metricgroup__find_metric_data data = {
.metric = metric,
- .pe = pe,
+ .pm = pm,
};
- return pmu_events_table_for_each_event(table, metricgroup__find_metric_callback, &data)
+ return pmu_metrics_table_for_each_metric(table, metricgroup__find_metric_callback, &data)
? true : false;
}
static int add_metric(struct list_head *metric_list,
- const struct pmu_event *pe,
+ const struct pmu_metric *pm,
const char *modifier,
bool metric_no_group,
const char *user_requested_cpu_list,
bool system_wide,
struct metric *root_metric,
const struct visited_metric *visited,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
int ret = 0;
- pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+ pr_debug("metric expr %s for %s\n", pm->metric_expr, pm->metric_name);
- if (!strstr(pe->metric_expr, "?")) {
- ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0,
+ if (!strstr(pm->metric_expr, "?")) {
+ ret = __add_metric(metric_list, pm, modifier, metric_no_group, 0,
user_requested_cpu_list, system_wide, root_metric,
visited, table);
} else {
int j, count;
- count = arch_get_runtimeparam(pe);
+ count = arch_get_runtimeparam(pm);
/* This loop is added to create multiple
* events depend on count value and add
@@ -1039,7 +1036,7 @@ static int add_metric(struct list_head *metric_list,
*/
for (j = 0; j < count && !ret; j++)
- ret = __add_metric(metric_list, pe, modifier, metric_no_group, j,
+ ret = __add_metric(metric_list, pm, modifier, metric_no_group, j,
user_requested_cpu_list, system_wide,
root_metric, visited, table);
}
@@ -1047,17 +1044,17 @@ static int add_metric(struct list_head *metric_list,
return ret;
}
-static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
- void *data)
+static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *data)
{
struct metricgroup_add_iter_data *d = data;
int ret;
- if (!match_pe_metric(pe, d->metric_name))
+ if (!match_pm_metric(pm, d->metric_name))
return 0;
- ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group,
+ ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group,
d->user_requested_cpu_list, d->system_wide,
d->root_metric, d->visited, d->table);
if (ret)
@@ -1107,19 +1104,19 @@ struct metricgroup__add_metric_data {
bool has_match;
};
-static int metricgroup__add_metric_callback(const struct pmu_event *pe,
- const struct pmu_events_table *table,
+static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table,
void *vdata)
{
struct metricgroup__add_metric_data *data = vdata;
int ret = 0;
- if (pe->metric_expr &&
- (match_metric(pe->metric_group, data->metric_name) ||
- match_metric(pe->metric_name, data->metric_name))) {
+ if (pm->metric_expr &&
+ (match_metric(pm->metric_group, data->metric_name) ||
+ match_metric(pm->metric_name, data->metric_name))) {
data->has_match = true;
- ret = add_metric(data->list, pe, data->modifier, data->metric_no_group,
+ ret = add_metric(data->list, pm, data->modifier, data->metric_no_group,
data->user_requested_cpu_list, data->system_wide,
/*root_metric=*/NULL, /*visited_metrics=*/NULL, table);
}
@@ -1146,7 +1143,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
const char *user_requested_cpu_list,
bool system_wide,
struct list_head *metric_list,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
LIST_HEAD(list);
int ret;
@@ -1166,8 +1163,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
* Iterate over all metrics seeing if metric matches either the
* name or group. When it does add the metric to the list.
*/
- ret = pmu_events_table_for_each_event(table, metricgroup__add_metric_callback,
- &data);
+ ret = pmu_metrics_table_for_each_metric(table, metricgroup__add_metric_callback,
+ &data);
if (ret)
goto out;
@@ -1189,7 +1186,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
},
};
- pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+ pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
}
/* End of pmu events. */
if (!has_match)
@@ -1222,7 +1219,7 @@ out:
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
const char *user_requested_cpu_list,
bool system_wide, struct list_head *metric_list,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
char *list_itr, *list_copy, *metric_name, *modifier;
int ret, count = 0;
@@ -1432,7 +1429,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
bool system_wide,
struct perf_pmu *fake_pmu,
struct rblist *metric_events_list,
- const struct pmu_events_table *table)
+ const struct pmu_metrics_table *table)
{
struct evlist *combined_evlist = NULL;
LIST_HEAD(metric_list);
@@ -1580,7 +1577,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool system_wide,
struct rblist *metric_events)
{
- const struct pmu_events_table *table = pmu_events_table__find();
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
if (!table)
return -EINVAL;
@@ -1591,7 +1588,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
}
int metricgroup__parse_groups_test(struct evlist *evlist,
- const struct pmu_events_table *table,
+ const struct pmu_metrics_table *table,
const char *str,
bool metric_no_group,
bool metric_no_merge,
@@ -1603,16 +1600,16 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
&perf_pmu__fake, metric_events, table);
}
-static int metricgroup__has_metric_callback(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
+static int metricgroup__has_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
void *vdata)
{
const char *metric = vdata;
- if (!pe->metric_expr)
+ if (!pm->metric_expr)
return 0;
- if (match_metric(pe->metric_name, metric))
+ if (match_metric(pm->metric_name, metric))
return 1;
return 0;
@@ -1620,13 +1617,13 @@ static int metricgroup__has_metric_callback(const struct pmu_event *pe,
bool metricgroup__has_metric(const char *metric)
{
- const struct pmu_events_table *table = pmu_events_table__find();
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
if (!table)
return false;
- return pmu_events_table_for_each_event(table, metricgroup__has_metric_callback,
- (void *)metric) ? true : false;
+ return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback,
+ (void *)metric) ? true : false;
}
int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 0013cf582173..84030321a057 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -73,7 +73,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool system_wide,
struct rblist *metric_events);
int metricgroup__parse_groups_test(struct evlist *evlist,
- const struct pmu_events_table *table,
+ const struct pmu_metrics_table *table,
const char *str,
bool metric_no_group,
bool metric_no_merge,
@@ -81,7 +81,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
bool metricgroup__has_metric(const char *metric);
-int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused);
+int arch_get_runtimeparam(const struct pmu_metric *pm);
void metricgroup__rblist_exit(struct rblist *metric_events);
int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 21cce83462b3..0336ff27c15f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1570,8 +1570,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->scale = info.scale;
evsel->per_pkg = info.per_pkg;
evsel->snapshot = info.snapshot;
- evsel->metric_expr = info.metric_expr;
- evsel->metric_name = info.metric_name;
return 0;
}
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index ac3227ba769c..b59ba825ddc9 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -193,8 +193,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
info->desc, /*long_desc=*/NULL,
- /*encoding_desc=*/buf->buf,
- /*metric_name=*/NULL, /*metric_expr=*/NULL);
+ /*encoding_desc=*/buf->buf);
pfm_for_each_event_attr(j, info) {
pfm_event_attr_info_t ainfo;
@@ -224,8 +223,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
ainfo.desc, /*long_desc=*/NULL,
- /*encoding_desc=*/buf->buf,
- /*metric_name=*/NULL, /*metric_expr=*/NULL);
+ /*encoding_desc=*/buf->buf);
}
}
}
diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c
index f51ccaac60ee..38628805a952 100644
--- a/tools/perf/util/pmu-hybrid.c
+++ b/tools/perf/util/pmu-hybrid.c
@@ -20,32 +20,15 @@ LIST_HEAD(perf_pmu__hybrid_pmus);
bool perf_pmu__hybrid_mounted(const char *name)
{
- char path[PATH_MAX];
- const char *sysfs;
- FILE *file;
- int n, cpu;
+ int cpu;
+ char pmu_name[PATH_MAX];
+ struct perf_pmu pmu = {.name = pmu_name};
if (strncmp(name, "cpu_", 4))
return false;
- sysfs = sysfs__mountpoint();
- if (!sysfs)
- return false;
-
- snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name);
- if (!file_available(path))
- return false;
-
- file = fopen(path, "r");
- if (!file)
- return false;
-
- n = fscanf(file, "%u", &cpu);
- fclose(file);
- if (n <= 0)
- return false;
-
- return true;
+ strlcpy(pmu_name, name, sizeof(pmu_name));
+ return perf_pmu__scan_file(&pmu, "cpus", "%u", &cpu) > 0;
}
struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 2bdeb89352e7..c256b29defad 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -19,6 +19,7 @@
#include <regex.h>
#include <perf/cpumap.h>
#include <fnmatch.h>
+#include <math.h>
#include "debug.h"
#include "evsel.h"
#include "pmu.h"
@@ -107,14 +108,10 @@ int perf_pmu__format_parse(char *dir, struct list_head *head)
static int pmu_format(const char *name, struct list_head *format)
{
char path[PATH_MAX];
- const char *sysfs = sysfs__mountpoint();
- if (!sysfs)
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "format"))
return -1;
- snprintf(path, PATH_MAX,
- "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
-
if (!file_available(path))
return 0;
@@ -283,10 +280,6 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old,
perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
&newalias->long_desc);
perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
- perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr,
- &newalias->metric_expr);
- perf_pmu_assign_str(old->name, "metric_name", &old->metric_name,
- &newalias->metric_name);
perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
old->scale = newalias->scale;
old->per_pkg = newalias->per_pkg;
@@ -302,8 +295,6 @@ void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
zfree(&newalias->long_desc);
zfree(&newalias->topic);
zfree(&newalias->str);
- zfree(&newalias->metric_expr);
- zfree(&newalias->metric_name);
zfree(&newalias->pmu_name);
parse_events_terms__purge(&newalias->terms);
free(newalias);
@@ -340,16 +331,13 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
int num;
char newval[256];
char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL,
- *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL,
- *pmu_name = NULL;
+ *deprecated = NULL, *pmu_name = NULL;
if (pe) {
long_desc = (char *)pe->long_desc;
topic = (char *)pe->topic;
unit = (char *)pe->unit;
perpkg = (char *)pe->perpkg;
- metric_expr = (char *)pe->metric_expr;
- metric_name = (char *)pe->metric_name;
deprecated = (char *)pe->deprecated;
pmu_name = (char *)pe->pmu;
}
@@ -404,8 +392,6 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
perf_pmu__parse_snapshot(alias, dir, name);
}
- alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL;
- alias->metric_name = metric_name ? strdup(metric_name): NULL;
alias->desc = desc ? strdup(desc) : NULL;
alias->long_desc = long_desc ? strdup(long_desc) :
desc ? strdup(desc) : NULL;
@@ -513,14 +499,10 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
static int pmu_aliases(const char *name, struct list_head *head)
{
char path[PATH_MAX];
- const char *sysfs = sysfs__mountpoint();
- if (!sysfs)
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "events"))
return -1;
- snprintf(path, PATH_MAX,
- "%s/bus/event_source/devices/%s/events", sysfs, name);
-
if (!file_available(path))
return 0;
@@ -554,52 +536,16 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
return 0;
}
-/*
- * Reading/parsing the default pmu type value, which should be
- * located at:
- * /sys/bus/event_source/devices/<dev>/type as sysfs attribute.
- */
-static int pmu_type(const char *name, __u32 *type)
-{
- char path[PATH_MAX];
- FILE *file;
- int ret = 0;
- const char *sysfs = sysfs__mountpoint();
-
- if (!sysfs)
- return -1;
-
- snprintf(path, PATH_MAX,
- "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
-
- if (access(path, R_OK) < 0)
- return -1;
-
- file = fopen(path, "r");
- if (!file)
- return -EINVAL;
-
- if (1 != fscanf(file, "%u", type))
- ret = -1;
-
- fclose(file);
- return ret;
-}
-
/* Add all pmus in sysfs to pmu list: */
static void pmu_read_sysfs(void)
{
char path[PATH_MAX];
DIR *dir;
struct dirent *dent;
- const char *sysfs = sysfs__mountpoint();
- if (!sysfs)
+ if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path)))
return;
- snprintf(path, PATH_MAX,
- "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
-
dir = opendir(path);
if (!dir)
return;
@@ -614,45 +560,29 @@ static void pmu_read_sysfs(void)
closedir(dir);
}
-static struct perf_cpu_map *__pmu_cpumask(const char *path)
-{
- FILE *file;
- struct perf_cpu_map *cpus;
-
- file = fopen(path, "r");
- if (!file)
- return NULL;
-
- cpus = perf_cpu_map__read(file);
- fclose(file);
- return cpus;
-}
-
/*
* Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
* may have a "cpus" file.
*/
-#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier"
-#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask"
-
static struct perf_cpu_map *pmu_cpumask(const char *name)
{
- char path[PATH_MAX];
struct perf_cpu_map *cpus;
- const char *sysfs = sysfs__mountpoint();
const char *templates[] = {
- CPUS_TEMPLATE_UNCORE,
- CPUS_TEMPLATE_CPU,
+ "cpumask",
+ "cpus",
NULL
};
const char **template;
+ char pmu_name[PATH_MAX];
+ struct perf_pmu pmu = {.name = pmu_name};
+ FILE *file;
- if (!sysfs)
- return NULL;
-
+ strlcpy(pmu_name, name, sizeof(pmu_name));
for (template = templates; *template; template++) {
- snprintf(path, PATH_MAX, *template, sysfs, name);
- cpus = __pmu_cpumask(path);
+ file = perf_pmu__open_file(&pmu, *template);
+ if (!file)
+ continue;
+ cpus = perf_cpu_map__read(file);
if (cpus)
return cpus;
}
@@ -663,13 +593,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name)
static bool pmu_is_uncore(const char *name)
{
char path[PATH_MAX];
- const char *sysfs;
if (perf_pmu__hybrid_mounted(name))
return false;
- sysfs = sysfs__mountpoint();
- snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
+ perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpumask");
return file_available(path);
}
@@ -678,9 +606,9 @@ static char *pmu_id(const char *name)
char path[PATH_MAX], *str;
size_t len;
- snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name);
+ perf_pmu__pathname_scnprintf(path, sizeof(path), name, "identifier");
- if (sysfs__read_str(path, &str, &len) < 0)
+ if (filename__read_str(path, &str, &len) < 0)
return NULL;
str[len - 1] = 0; /* remove line feed */
@@ -696,14 +624,9 @@ static char *pmu_id(const char *name)
static int is_arm_pmu_core(const char *name)
{
char path[PATH_MAX];
- const char *sysfs = sysfs__mountpoint();
- if (!sysfs)
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpus"))
return 0;
-
- /* Look for cpu sysfs (specific to arm) */
- scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
- sysfs, name);
return file_available(path);
}
@@ -729,7 +652,12 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu)
__weak const struct pmu_events_table *pmu_events_table__find(void)
{
- return perf_pmu__find_table(NULL);
+ return perf_pmu__find_events_table(NULL);
+}
+
+__weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
+{
+ return perf_pmu__find_metrics_table(NULL);
}
/*
@@ -822,9 +750,6 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
struct pmu_add_cpu_aliases_map_data *data = vdata;
const char *pname = pe->pmu ? pe->pmu : data->cpu_name;
- if (!pe->name)
- return 0;
-
if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name))
goto new_alias;
@@ -860,7 +785,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
const struct pmu_events_table *table;
- table = perf_pmu__find_table(pmu);
+ table = perf_pmu__find_events_table(pmu);
if (!table)
return;
@@ -879,12 +804,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
struct pmu_sys_event_iter_data *idata = data;
struct perf_pmu *pmu = idata->pmu;
- if (!pe->name) {
- if (pe->metric_group || pe->metric_name)
- return 0;
- return -EINVAL;
- }
-
if (!pe->compat || !pe->pmu)
return 0;
@@ -931,16 +850,11 @@ pmu_find_alias_name(const char *name __maybe_unused)
return NULL;
}
-static int pmu_max_precise(const char *name)
+static int pmu_max_precise(struct perf_pmu *pmu)
{
- char path[PATH_MAX];
int max_precise = -1;
- scnprintf(path, PATH_MAX,
- "bus/event_source/devices/%s/caps/max_precise",
- name);
-
- sysfs__read_int(path, &max_precise);
+ perf_pmu__scan_file(pmu, "caps/max_precise", "%d", &max_precise);
return max_precise;
}
@@ -969,11 +883,8 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
return NULL;
/*
- * Check the type first to avoid unnecessary work.
+ * Check the aliases first to avoid unnecessary work.
*/
- if (pmu_type(name, &type))
- return NULL;
-
if (pmu_aliases(name, &aliases))
return NULL;
@@ -983,9 +894,14 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
pmu->cpus = pmu_cpumask(name);
pmu->name = strdup(name);
+
if (!pmu->name)
goto err;
+ /* Read type, and ensure that type value is successfully assigned (return 1) */
+ if (perf_pmu__scan_file(pmu, "type", "%u", &type) != 1)
+ goto err;
+
alias_name = pmu_find_alias_name(name);
if (alias_name) {
pmu->alias_name = strdup(alias_name);
@@ -997,7 +913,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
pmu->is_uncore = pmu_is_uncore(name);
if (pmu->is_uncore)
pmu->id = pmu_id(name);
- pmu->max_precise = pmu_max_precise(name);
+ pmu->max_precise = pmu_max_precise(pmu);
pmu_add_cpu_aliases(&aliases, pmu);
pmu_add_sys_aliases(&aliases, pmu);
@@ -1469,8 +1385,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
info->unit = NULL;
info->scale = 0.0;
info->snapshot = false;
- info->metric_expr = NULL;
- info->metric_name = NULL;
list_for_each_entry_safe(term, h, head_terms, list) {
alias = pmu_find_alias(pmu, term);
@@ -1486,8 +1400,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
if (alias->per_pkg)
info->per_pkg = true;
- info->metric_expr = alias->metric_expr;
- info->metric_name = alias->metric_name;
list_del_init(&term->list);
parse_events_term__delete(term);
@@ -1703,8 +1615,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
for (j = 0; j < len; j++) {
const char *name, *alias = NULL, *scale_unit = NULL,
*desc = NULL, *long_desc = NULL,
- *encoding_desc = NULL, *topic = NULL,
- *metric_name = NULL, *metric_expr = NULL;
+ *encoding_desc = NULL, *topic = NULL;
bool deprecated = false;
size_t buf_used;
@@ -1742,8 +1653,6 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
"%s/%s/", aliases[j].pmu->name,
aliases[j].event->str) + 1;
- metric_name = aliases[j].event->metric_name;
- metric_expr = aliases[j].event->metric_expr;
deprecated = aliases[j].event->deprecated;
}
print_cb->print_event(print_state,
@@ -1756,9 +1665,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
"Kernel PMU event",
desc,
long_desc,
- encoding_desc,
- metric_name,
- metric_expr);
+ encoding_desc);
}
if (printed && pager_in_use())
printf("\n");
@@ -1783,19 +1690,14 @@ bool pmu_have_event(const char *pname, const char *name)
return false;
}
-static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
{
char path[PATH_MAX];
- const char *sysfs;
- sysfs = sysfs__mountpoint();
- if (!sysfs)
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name) ||
+ !file_available(path))
return NULL;
- snprintf(path, PATH_MAX,
- "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
- if (!file_available(path))
- return NULL;
return fopen(path, "r");
}
@@ -1816,6 +1718,16 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
return ret;
}
+bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name)
+{
+ char path[PATH_MAX];
+
+ if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name))
+ return false;
+
+ return file_available(path);
+}
+
static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
{
struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
@@ -1849,7 +1761,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
{
struct stat st;
char caps_path[PATH_MAX];
- const char *sysfs = sysfs__mountpoint();
DIR *caps_dir;
struct dirent *evt_ent;
@@ -1858,12 +1769,9 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
pmu->nr_caps = 0;
- if (!sysfs)
+ if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps"))
return -1;
- snprintf(caps_path, PATH_MAX,
- "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
-
if (stat(caps_path, &st) < 0) {
pmu->caps_initialized = true;
return 0; /* no error if caps does not exist */
@@ -1993,3 +1901,36 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
*ucpus_ptr = unmatched_cpus;
return 0;
}
+
+double __weak perf_pmu__cpu_slots_per_cycle(void)
+{
+ return NAN;
+}
+
+int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
+{
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+ return scnprintf(pathname, size, "%s/bus/event_source/devices/", sysfs);
+}
+
+/*
+ * Fill 'buf' with the path to a file or folder in 'pmu_name' in
+ * sysfs. For example if pmu_name = "cs_etm" and 'filename' = "format"
+ * then pathname will be filled with
+ * "/sys/bus/event_source/devices/cs_etm/format"
+ *
+ * Return 0 if the sysfs mountpoint couldn't be found or if no
+ * characters were written.
+ */
+int perf_pmu__pathname_scnprintf(char *buf, size_t size,
+ const char *pmu_name, const char *filename)
+{
+ char base_path[PATH_MAX];
+
+ if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path)))
+ return 0;
+ return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename);
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 69ca0004f94f..6b770f17eb86 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -7,6 +7,7 @@
#include <linux/perf_event.h>
#include <linux/list.h>
#include <stdbool.h>
+#include <stdio.h>
#include "parse-events.h"
#include "pmu-events/pmu-events.h"
@@ -22,8 +23,6 @@ enum {
};
#define PERF_PMU_FORMAT_BITS 64
-#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
-#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus"
#define MAX_PMU_NAME_LEN 128
struct perf_event_attr;
@@ -133,8 +132,6 @@ extern struct perf_pmu perf_pmu__fake;
struct perf_pmu_info {
const char *unit;
- const char *metric_expr;
- const char *metric_name;
double scale;
bool per_pkg;
bool snapshot;
@@ -188,13 +185,6 @@ struct perf_pmu_alias {
* default.
*/
bool deprecated;
- /**
- * @metric_expr: A metric expression associated with an event. Doing
- * this makes little sense due to scale and unit applying to both.
- */
- char *metric_expr;
- /** @metric_name: A name for the metric. unit applying to both. */
- char *metric_name;
/** @pmu_name: The name copied from struct perf_pmu. */
char *pmu_name;
};
@@ -231,6 +221,8 @@ bool pmu_have_event(const char *pname, const char *name);
int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
+bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
+
int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
@@ -239,6 +231,7 @@ void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
char *perf_pmu__getcpuid(struct perf_pmu *pmu);
const struct pmu_events_table *pmu_events_table__find(void);
+const struct pmu_metrics_table *pmu_metrics_table__find(void);
bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
void perf_pmu_free_alias(struct perf_pmu_alias *alias);
@@ -259,4 +252,10 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
char *pmu_find_real_name(const char *name);
char *pmu_find_alias_name(const char *name);
+double perf_pmu__cpu_slots_per_cycle(void);
+int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
+int perf_pmu__pathname_scnprintf(char *buf, size_t size,
+ const char *pmu_name, const char *filename);
+FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 2646ae18d9f9..62e9ea7dcf40 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -101,9 +101,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb, void *print
"Tracepoint event",
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
}
free(dir_path);
free(evt_namelist);
@@ -195,9 +193,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
"SDT event",
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
free(evt_name);
}
@@ -255,9 +251,7 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta
event_type_descriptors[PERF_TYPE_HW_CACHE],
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
}
strlist__delete(evt_name_list);
return 0;
@@ -277,9 +271,7 @@ void print_tool_events(const struct print_callbacks *print_cb, void *print_state
"Tool event",
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
}
}
@@ -331,9 +323,7 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta
event_type_descriptors[type],
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
}
strlist__delete(evt_name_list);
}
@@ -364,9 +354,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
event_type_descriptors[PERF_TYPE_RAW],
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
print_cb->print_event(print_state,
/*topic=*/NULL,
@@ -378,9 +366,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
event_type_descriptors[PERF_TYPE_RAW],
"(see 'man perf-list' on how to encode it)",
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
print_cb->print_event(print_state,
/*topic=*/NULL,
@@ -392,9 +378,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
event_type_descriptors[PERF_TYPE_BREAKPOINT],
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL,
- /*metric_name=*/NULL,
- /*metric_expr=*/NULL);
+ /*encoding_desc=*/NULL);
print_tracepoint_events(print_cb, print_state);
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index c237e53c4487..716dcf4b4859 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -16,8 +16,7 @@ struct print_callbacks {
const char *scale_unit,
bool deprecated, const char *event_type_desc,
const char *desc, const char *long_desc,
- const char *encoding_desc,
- const char *metric_name, const char *metric_expr);
+ const char *encoding_desc);
void (*print_metric)(void *print_state,
const char *group,
const char *name,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 0c24bc7afbca..881d94f65a6b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -917,7 +917,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
if (!dinfo) {
if (need_dwarf)
- return -ENOENT;
+ return -ENODATA;
pr_debug("Could not open debuginfo. Try to use symbols.\n");
return 0;
}
@@ -956,7 +956,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
if (ntevs == 0) { /* No error but failed to find probe point. */
pr_warning("Probe point '%s' not found.\n",
synthesize_perf_probe_point(&pev->point));
- return -ENOENT;
+ return -ENODEV;
} else if (ntevs < 0) {
/* Error path : ntevs < 0 */
pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 212031b97910..42e8b813d010 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -20,6 +20,7 @@
#include "stat.h"
#include "metricgroup.h"
#include "util/env.h"
+#include "util/pmu.h"
#include <internal/lib.h>
#include "util.h"
@@ -76,14 +77,7 @@ const char *perf_env__arch(struct perf_env *env __maybe_unused)
}
/*
- * Add this one here not to drag util/stat-shadow.c
- */
-void perf_stat__collect_metric_expr(struct evlist *evsel_list)
-{
-}
-
-/*
- * This one is needed not to drag the PMU bandwagon, jevents generated
+ * These ones are needed not to drag the PMU bandwagon, jevents generated
* pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for
* doing per PMU perf_event_attr.exclude_guest handling, not really needed, so
* far, for the perf python binding known usecases, revisit if this become
@@ -94,6 +88,11 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused)
return NULL;
}
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...)
+{
+ return EOF;
+}
+
/*
* Add this one here not to drag util/metricgroup.c
*/
@@ -442,10 +441,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
offset = val;
len = offset >> 16;
offset &= 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (field->flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(field->flags))
offset += field->offset + field->size;
-#endif
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 60ec79d4eea4..33b08e0ac746 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -92,7 +92,10 @@ struct perf_sample {
u8 cpumode;
u16 misc;
u16 ins_lat;
- u16 p_stage_cyc;
+ union {
+ u16 p_stage_cyc;
+ u16 retire_lat;
+ };
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index c097b7934fd4..83fd2fd0ba16 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -393,10 +393,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(data + field->offset);
offset &= 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (field->flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(field->flags))
offset += field->offset + field->size;
-#endif
} else
offset = field->offset;
XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e930f5f1f36d..2c2697c5d025 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -994,10 +994,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
offset = val;
len = offset >> 16;
offset &= 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (field->flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(field->flags))
offset += field->offset + field->size;
-#endif
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7c021c6cedb9..749d5b5c135b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1180,7 +1180,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *e = &entries[i];
if (!callstack) {
- printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n",
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
@@ -1188,7 +1188,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved,
- get_branch_type(e));
+ get_branch_type(e),
+ e->flags.spec ? branch_spec_desc(e->flags.spec) : "");
} else {
if (i == 0) {
printf("..... %2"PRIu64": %016" PRIx64 "\n"
@@ -1699,8 +1700,13 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_AUXTRACE_INFO:
return tool->auxtrace_info(session, event);
case PERF_RECORD_AUXTRACE:
- /* setup for reading amidst mmap */
- lseek(fd, file_offset + event->header.size, SEEK_SET);
+ /*
+ * Setup for reading amidst mmap, but only when we
+ * are in 'file' mode. The 'pipe' fd is in proper
+ * place already.
+ */
+ if (!perf_data__is_pipe(session->data))
+ lseek(fd, file_offset + event->header.size, SEEK_SET);
return tool->auxtrace(session, event);
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 37662cdec5ee..093a0c8b2e3d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -28,6 +28,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -53,6 +54,13 @@ static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local
static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
/*
+ * Some architectures have Adjacent Cacheline Prefetch feature, which
+ * behaves like the cacheline size is doubled. Enable this flag to
+ * check things in double cacheline granularity.
+ */
+bool chk_double_cl;
+
+/*
* Replaces all occurrences of a char used with the:
*
* -t, --field-separator
@@ -1499,8 +1507,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
addr:
/* al_addr does all the right addr - start + offset calculations */
- l = cl_address(left->mem_info->daddr.al_addr);
- r = cl_address(right->mem_info->daddr.al_addr);
+ l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl);
+ r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl);
if (l > r) return -1;
if (l < r) return 1;
@@ -1519,7 +1527,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
if (he->mem_info) {
struct map *map = he->mem_info->daddr.ms.map;
- addr = cl_address(he->mem_info->daddr.al_addr);
+ addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl);
ms = &he->mem_info->daddr.ms;
/* print [s] for shared data mmaps */
@@ -2132,6 +2140,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
DIM(SORT_ADDR, "addr", sort_addr),
+ DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
};
#undef DIM
@@ -2667,10 +2677,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
tep_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
-#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
- if (field->flags & TEP_FIELD_IS_RELATIVE)
+ if (tep_field_is_relative(field->flags))
offset += field->offset + field->size;
-#endif
/* record max width for output */
if (size > hde->dynamic_len)
hde->dynamic_len = size;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 921715e6aec4..d79a100e5999 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -35,6 +35,7 @@ extern struct sort_entry sort_sym_from;
extern struct sort_entry sort_sym_to;
extern struct sort_entry sort_srcline;
extern const char default_mem_sort_order[];
+extern bool chk_double_cl;
struct res_sample {
u64 time;
@@ -237,6 +238,8 @@ enum sort_type {
SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC,
SORT_ADDR,
+ SORT_LOCAL_RETIRE_LAT,
+ SORT_GLOBAL_RETIRE_LAT,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8bd8b0142630..1b5cb20efd23 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -787,6 +787,51 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun
uniquify_event_name(counter);
}
+/**
+ * should_skip_zero_count() - Check if the event should print 0 values.
+ * @config: The perf stat configuration (including aggregation mode).
+ * @counter: The evsel with its associated cpumap.
+ * @id: The aggregation id that is being queried.
+ *
+ * Due to mismatch between the event cpumap or thread-map and the
+ * aggregation mode, sometimes it'd iterate the counter with the map
+ * which does not contain any values.
+ *
+ * For example, uncore events have dedicated CPUs to manage them,
+ * result for other CPUs should be zero and skipped.
+ *
+ * Return: %true if the value should NOT be printed, %false if the value
+ * needs to be printed like "<not counted>" or "<not supported>".
+ */
+static bool should_skip_zero_counter(struct perf_stat_config *config,
+ struct evsel *counter,
+ const struct aggr_cpu_id *id)
+{
+ struct perf_cpu cpu;
+ int idx;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise it will have too many 0 output.
+ */
+ if (config->aggr_mode == AGGR_THREAD && config->system_wide)
+ return true;
+ /*
+ * Skip value 0 when it's an uncore event and the given aggr id
+ * does not belong to the PMU cpumask.
+ */
+ if (!counter->pmu || !counter->pmu->is_uncore)
+ return false;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) {
+ struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu);
+
+ if (aggr_cpu_id__equal(id, &own_id))
+ return false;
+ }
+ return true;
+}
+
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
struct outstate *os)
@@ -814,11 +859,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
ena = aggr->counts.ena;
run = aggr->counts.run;
- /*
- * Skip value 0 when enabling --per-thread globally, otherwise it will
- * have too many 0 output.
- */
- if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide)
+ if (val == 0 && should_skip_zero_counter(config, counter, &id))
return;
if (!metric_only) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index cadb2df23c87..806b32156459 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -311,7 +311,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
- } else if (counter->metric_leader) {
+ } else if (counter->metric_leader && !counter->merged_stat) {
v = saved_value_lookup(counter->metric_leader,
map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
@@ -346,114 +346,6 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
return color;
}
-static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
- const char *name)
-{
- struct evsel *c2;
-
- evlist__for_each_entry (evsel_list, c2) {
- if (!strcasecmp(c2->name, name) && !c2->collect_stat)
- return c2;
- }
- return NULL;
-}
-
-/* Mark MetricExpr target events and link events using them to them. */
-void perf_stat__collect_metric_expr(struct evlist *evsel_list)
-{
- struct evsel *counter, *leader, **metric_events, *oc;
- bool found;
- struct expr_parse_ctx *ctx;
- struct hashmap_entry *cur;
- size_t bkt;
- int i;
-
- ctx = expr__ctx_new();
- if (!ctx) {
- pr_debug("expr__ctx_new failed");
- return;
- }
- evlist__for_each_entry(evsel_list, counter) {
- bool invalid = false;
-
- leader = evsel__leader(counter);
- if (!counter->metric_expr)
- continue;
-
- expr__ctx_clear(ctx);
- metric_events = counter->metric_events;
- if (!metric_events) {
- if (expr__find_ids(counter->metric_expr,
- counter->name,
- ctx) < 0)
- continue;
-
- metric_events = calloc(sizeof(struct evsel *),
- hashmap__size(ctx->ids) + 1);
- if (!metric_events) {
- expr__ctx_free(ctx);
- return;
- }
- counter->metric_events = metric_events;
- }
-
- i = 0;
- hashmap__for_each_entry(ctx->ids, cur, bkt) {
- const char *metric_name = cur->pkey;
-
- found = false;
- if (leader) {
- /* Search in group */
- for_each_group_member (oc, leader) {
- if (!strcasecmp(oc->name,
- metric_name) &&
- !oc->collect_stat) {
- found = true;
- break;
- }
- }
- }
- if (!found) {
- /* Search ignoring groups */
- oc = perf_stat__find_event(evsel_list,
- metric_name);
- }
- if (!oc) {
- /* Deduping one is good enough to handle duplicated PMUs. */
- static char *printed;
-
- /*
- * Adding events automatically would be difficult, because
- * it would risk creating groups that are not schedulable.
- * perf stat doesn't understand all the scheduling constraints
- * of events. So we ask the user instead to add the missing
- * events.
- */
- if (!printed ||
- strcasecmp(printed, metric_name)) {
- fprintf(stderr,
- "Add %s event to groups to get metric expression for %s\n",
- metric_name,
- counter->name);
- free(printed);
- printed = strdup(metric_name);
- }
- invalid = true;
- continue;
- }
- metric_events[i++] = oc;
- oc->collect_stat = true;
- }
- metric_events[i] = NULL;
- if (invalid) {
- free(metric_events);
- counter->metric_events = NULL;
- counter->metric_expr = NULL;
- }
- }
- expr__ctx_free(ctx);
-}
-
static double runtime_stat_avg(struct runtime_stat *st,
enum stat_type type, int map_idx,
struct runtime_stat_data *rsd)
@@ -1299,10 +1191,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
color = NULL;
print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
core_bound * 100.);
- } else if (evsel->metric_expr) {
- generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
- evsel->name, evsel->metric_name, NULL, 1,
- map_idx, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) {
char unit = ' ';
char unit_buf[10] = "/sec";
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 499c3bf81333..b1c29156c560 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -257,7 +257,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st);
-void perf_stat__collect_metric_expr(struct evlist *);
int evlist__alloc_stats(struct perf_stat_config *config,
struct evlist *evlist, bool alloc_raw);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 96767d1b3f1c..41882ae8452e 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -323,15 +323,325 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
return demangled;
}
-#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
- for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
- idx < nr_entries; \
- ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+struct rel_info {
+ u32 nr_entries;
+ u32 *sorted;
+ bool is_rela;
+ Elf_Data *reldata;
+ GElf_Rela rela;
+ GElf_Rel rel;
+};
+
+static u32 get_rel_symidx(struct rel_info *ri, u32 idx)
+{
+ idx = ri->sorted ? ri->sorted[idx] : idx;
+ if (ri->is_rela) {
+ gelf_getrela(ri->reldata, idx, &ri->rela);
+ return GELF_R_SYM(ri->rela.r_info);
+ }
+ gelf_getrel(ri->reldata, idx, &ri->rel);
+ return GELF_R_SYM(ri->rel.r_info);
+}
-#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
- for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
- idx < nr_entries; \
- ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+static u64 get_rel_offset(struct rel_info *ri, u32 x)
+{
+ if (ri->is_rela) {
+ GElf_Rela rela;
+
+ gelf_getrela(ri->reldata, x, &rela);
+ return rela.r_offset;
+ } else {
+ GElf_Rel rel;
+
+ gelf_getrel(ri->reldata, x, &rel);
+ return rel.r_offset;
+ }
+}
+
+static int rel_cmp(const void *a, const void *b, void *r)
+{
+ struct rel_info *ri = r;
+ u64 a_offset = get_rel_offset(ri, *(const u32 *)a);
+ u64 b_offset = get_rel_offset(ri, *(const u32 *)b);
+
+ return a_offset < b_offset ? -1 : (a_offset > b_offset ? 1 : 0);
+}
+
+static int sort_rel(struct rel_info *ri)
+{
+ size_t sz = sizeof(ri->sorted[0]);
+ u32 i;
+
+ ri->sorted = calloc(ri->nr_entries, sz);
+ if (!ri->sorted)
+ return -1;
+ for (i = 0; i < ri->nr_entries; i++)
+ ri->sorted[i] = i;
+ qsort_r(ri->sorted, ri->nr_entries, sz, rel_cmp, ri);
+ return 0;
+}
+
+/*
+ * For x86_64, the GNU linker is putting IFUNC information in the relocation
+ * addend.
+ */
+static bool addend_may_be_ifunc(GElf_Ehdr *ehdr, struct rel_info *ri)
+{
+ return ehdr->e_machine == EM_X86_64 && ri->is_rela &&
+ GELF_R_TYPE(ri->rela.r_info) == R_X86_64_IRELATIVE;
+}
+
+static bool get_ifunc_name(Elf *elf, struct dso *dso, GElf_Ehdr *ehdr,
+ struct rel_info *ri, char *buf, size_t buf_sz)
+{
+ u64 addr = ri->rela.r_addend;
+ struct symbol *sym;
+ GElf_Phdr phdr;
+
+ if (!addend_may_be_ifunc(ehdr, ri))
+ return false;
+
+ if (elf_read_program_header(elf, addr, &phdr))
+ return false;
+
+ addr -= phdr.p_vaddr - phdr.p_offset;
+
+ sym = dso__find_symbol_nocache(dso, addr);
+
+ /* Expecting the address to be an IFUNC or IFUNC alias */
+ if (!sym || sym->start != addr || (sym->type != STT_GNU_IFUNC && !sym->ifunc_alias))
+ return false;
+
+ snprintf(buf, buf_sz, "%s@plt", sym->name);
+
+ return true;
+}
+
+static void exit_rel(struct rel_info *ri)
+{
+ free(ri->sorted);
+}
+
+static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt,
+ u64 *plt_header_size, u64 *plt_entry_size)
+{
+ switch (ehdr->e_machine) {
+ case EM_ARM:
+ *plt_header_size = 20;
+ *plt_entry_size = 12;
+ return true;
+ case EM_AARCH64:
+ *plt_header_size = 32;
+ *plt_entry_size = 16;
+ return true;
+ case EM_SPARC:
+ *plt_header_size = 48;
+ *plt_entry_size = 12;
+ return true;
+ case EM_SPARCV9:
+ *plt_header_size = 128;
+ *plt_entry_size = 32;
+ return true;
+ case EM_386:
+ case EM_X86_64:
+ *plt_entry_size = shdr_plt->sh_entsize;
+ /* Size is 8 or 16, if not, assume alignment indicates size */
+ if (*plt_entry_size != 8 && *plt_entry_size != 16)
+ *plt_entry_size = shdr_plt->sh_addralign == 8 ? 8 : 16;
+ *plt_header_size = *plt_entry_size;
+ break;
+ default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
+ *plt_header_size = shdr_plt->sh_entsize;
+ *plt_entry_size = shdr_plt->sh_entsize;
+ break;
+ }
+ if (*plt_entry_size)
+ return true;
+ pr_debug("Missing PLT entry size for %s\n", dso->long_name);
+ return false;
+}
+
+static bool machine_is_x86(GElf_Half e_machine)
+{
+ return e_machine == EM_386 || e_machine == EM_X86_64;
+}
+
+struct rela_dyn {
+ GElf_Addr offset;
+ u32 sym_idx;
+};
+
+struct rela_dyn_info {
+ struct dso *dso;
+ Elf_Data *plt_got_data;
+ u32 nr_entries;
+ struct rela_dyn *sorted;
+ Elf_Data *dynsym_data;
+ Elf_Data *dynstr_data;
+ Elf_Data *rela_dyn_data;
+};
+
+static void exit_rela_dyn(struct rela_dyn_info *di)
+{
+ free(di->sorted);
+}
+
+static int cmp_offset(const void *a, const void *b)
+{
+ const struct rela_dyn *va = a;
+ const struct rela_dyn *vb = b;
+
+ return va->offset < vb->offset ? -1 : (va->offset > vb->offset ? 1 : 0);
+}
+
+static int sort_rela_dyn(struct rela_dyn_info *di)
+{
+ u32 i, n;
+
+ di->sorted = calloc(di->nr_entries, sizeof(di->sorted[0]));
+ if (!di->sorted)
+ return -1;
+
+ /* Get data for sorting: the offset and symbol index */
+ for (i = 0, n = 0; i < di->nr_entries; i++) {
+ GElf_Rela rela;
+ u32 sym_idx;
+
+ gelf_getrela(di->rela_dyn_data, i, &rela);
+ sym_idx = GELF_R_SYM(rela.r_info);
+ if (sym_idx) {
+ di->sorted[n].sym_idx = sym_idx;
+ di->sorted[n].offset = rela.r_offset;
+ n += 1;
+ }
+ }
+
+ /* Sort by offset */
+ di->nr_entries = n;
+ qsort(di->sorted, n, sizeof(di->sorted[0]), cmp_offset);
+
+ return 0;
+}
+
+static void get_rela_dyn_info(Elf *elf, GElf_Ehdr *ehdr, struct rela_dyn_info *di, Elf_Scn *scn)
+{
+ GElf_Shdr rela_dyn_shdr;
+ GElf_Shdr shdr;
+
+ di->plt_got_data = elf_getdata(scn, NULL);
+
+ scn = elf_section_by_name(elf, ehdr, &rela_dyn_shdr, ".rela.dyn", NULL);
+ if (!scn || !rela_dyn_shdr.sh_link || !rela_dyn_shdr.sh_entsize)
+ return;
+
+ di->nr_entries = rela_dyn_shdr.sh_size / rela_dyn_shdr.sh_entsize;
+ di->rela_dyn_data = elf_getdata(scn, NULL);
+
+ scn = elf_getscn(elf, rela_dyn_shdr.sh_link);
+ if (!scn || !gelf_getshdr(scn, &shdr) || !shdr.sh_link)
+ return;
+
+ di->dynsym_data = elf_getdata(scn, NULL);
+ di->dynstr_data = elf_getdata(elf_getscn(elf, shdr.sh_link), NULL);
+
+ if (!di->plt_got_data || !di->dynstr_data || !di->dynsym_data || !di->rela_dyn_data)
+ return;
+
+ /* Sort into offset order */
+ sort_rela_dyn(di);
+}
+
+/* Get instruction displacement from a plt entry for x86_64 */
+static u32 get_x86_64_plt_disp(const u8 *p)
+{
+ u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa};
+ int n = 0;
+
+ /* Skip endbr64 */
+ if (!memcmp(p, endbr64, sizeof(endbr64)))
+ n += sizeof(endbr64);
+ /* Skip bnd prefix */
+ if (p[n] == 0xf2)
+ n += 1;
+ /* jmp with 4-byte displacement */
+ if (p[n] == 0xff && p[n + 1] == 0x25) {
+ n += 2;
+ /* Also add offset from start of entry to end of instruction */
+ return n + 4 + le32toh(*(const u32 *)(p + n));
+ }
+ return 0;
+}
+
+static bool get_plt_got_name(GElf_Shdr *shdr, size_t i,
+ struct rela_dyn_info *di,
+ char *buf, size_t buf_sz)
+{
+ struct rela_dyn vi, *vr;
+ const char *sym_name;
+ char *demangled;
+ GElf_Sym sym;
+ u32 disp;
+
+ if (!di->sorted)
+ return false;
+
+ disp = get_x86_64_plt_disp(di->plt_got_data->d_buf + i);
+ if (!disp)
+ return false;
+
+ /* Compute target offset of the .plt.got entry */
+ vi.offset = shdr->sh_offset + di->plt_got_data->d_off + i + disp;
+
+ /* Find that offset in .rela.dyn (sorted by offset) */
+ vr = bsearch(&vi, di->sorted, di->nr_entries, sizeof(di->sorted[0]), cmp_offset);
+ if (!vr)
+ return false;
+
+ /* Get the associated symbol */
+ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym);
+ sym_name = elf_sym__name(&sym, di->dynstr_data);
+ demangled = demangle_sym(di->dso, 0, sym_name);
+ if (demangled != NULL)
+ sym_name = demangled;
+
+ snprintf(buf, buf_sz, "%s@plt", sym_name);
+
+ free(demangled);
+
+ return *sym_name;
+}
+
+static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
+ GElf_Ehdr *ehdr,
+ char *buf, size_t buf_sz)
+{
+ struct rela_dyn_info di = { .dso = dso };
+ struct symbol *sym;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ int err = -1;
+ size_t i;
+
+ scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL);
+ if (!scn || !shdr.sh_entsize)
+ return 0;
+
+ if (ehdr->e_machine == EM_X86_64)
+ get_rela_dyn_info(elf, ehdr, &di, scn);
+
+ for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) {
+ if (!get_plt_got_name(&shdr, i, &di, buf, buf_sz))
+ snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i);
+ sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf);
+ if (!sym)
+ goto out;
+ symbols__insert(&dso->symbols, sym);
+ }
+ err = 0;
+out:
+ exit_rela_dyn(&di);
+ return err;
+}
/*
* We need to check if we have a .dynsym, so that we can handle the
@@ -342,56 +652,104 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
*/
int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
{
- uint32_t nr_rel_entries, idx;
+ uint32_t idx;
GElf_Sym sym;
u64 plt_offset, plt_header_size, plt_entry_size;
- GElf_Shdr shdr_plt;
- struct symbol *f;
+ GElf_Shdr shdr_plt, plt_sec_shdr;
+ struct symbol *f, *plt_sym;
GElf_Shdr shdr_rel_plt, shdr_dynsym;
- Elf_Data *reldata, *syms, *symstrs;
+ Elf_Data *syms, *symstrs;
Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
- size_t dynsym_idx;
GElf_Ehdr ehdr;
char sympltname[1024];
Elf *elf;
- int nr = 0, symidx, err = 0;
-
- if (!ss->dynsym)
- return 0;
+ int nr = 0, err = -1;
+ struct rel_info ri = { .is_rela = false };
+ bool lazy_plt;
elf = ss->elf;
ehdr = ss->ehdr;
- scn_dynsym = ss->dynsym;
- shdr_dynsym = ss->dynshdr;
- dynsym_idx = ss->dynsym_idx;
+ if (!elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL))
+ return 0;
+
+ /*
+ * A symbol from a previous section (e.g. .init) can have been expanded
+ * by symbols__fixup_end() to overlap .plt. Truncate it before adding
+ * a symbol for .plt header.
+ */
+ f = dso__find_symbol_nocache(dso, shdr_plt.sh_offset);
+ if (f && f->start < shdr_plt.sh_offset && f->end > shdr_plt.sh_offset)
+ f->end = shdr_plt.sh_offset;
+
+ if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size))
+ return 0;
+
+ /* Add a symbol for .plt header */
+ plt_sym = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt");
+ if (!plt_sym)
+ goto out_elf_end;
+ symbols__insert(&dso->symbols, plt_sym);
- if (scn_dynsym == NULL)
+ /* Only x86 has .plt.got */
+ if (machine_is_x86(ehdr.e_machine) &&
+ dso__synthesize_plt_got_symbols(dso, elf, &ehdr, sympltname, sizeof(sympltname)))
goto out_elf_end;
+ /* Only x86 has .plt.sec */
+ if (machine_is_x86(ehdr.e_machine) &&
+ elf_section_by_name(elf, &ehdr, &plt_sec_shdr, ".plt.sec", NULL)) {
+ if (!get_plt_sizes(dso, &ehdr, &plt_sec_shdr, &plt_header_size, &plt_entry_size))
+ return 0;
+ /* Extend .plt symbol to entire .plt */
+ plt_sym->end = plt_sym->start + shdr_plt.sh_size;
+ /* Use .plt.sec offset */
+ plt_offset = plt_sec_shdr.sh_offset;
+ lazy_plt = false;
+ } else {
+ plt_offset = shdr_plt.sh_offset;
+ lazy_plt = true;
+ }
+
scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
".rela.plt", NULL);
if (scn_plt_rel == NULL) {
scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
".rel.plt", NULL);
if (scn_plt_rel == NULL)
- goto out_elf_end;
+ return 0;
}
- err = -1;
+ if (shdr_rel_plt.sh_type != SHT_RELA &&
+ shdr_rel_plt.sh_type != SHT_REL)
+ return 0;
- if (shdr_rel_plt.sh_link != dynsym_idx)
- goto out_elf_end;
+ if (!shdr_rel_plt.sh_link)
+ return 0;
- if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL)
+ if (shdr_rel_plt.sh_link == ss->dynsym_idx) {
+ scn_dynsym = ss->dynsym;
+ shdr_dynsym = ss->dynshdr;
+ } else if (shdr_rel_plt.sh_link == ss->symtab_idx) {
+ /*
+ * A static executable can have a .plt due to IFUNCs, in which
+ * case .symtab is used not .dynsym.
+ */
+ scn_dynsym = ss->symtab;
+ shdr_dynsym = ss->symshdr;
+ } else {
goto out_elf_end;
+ }
+
+ if (!scn_dynsym)
+ return 0;
/*
* Fetch the relocation section to find the idxes to the GOT
* and the symbols in the .dynsym they refer to.
*/
- reldata = elf_getdata(scn_plt_rel, NULL);
- if (reldata == NULL)
+ ri.reldata = elf_getdata(scn_plt_rel, NULL);
+ if (!ri.reldata)
goto out_elf_end;
syms = elf_getdata(scn_dynsym, NULL);
@@ -409,93 +767,57 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
if (symstrs->d_size == 0)
goto out_elf_end;
- nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
- plt_offset = shdr_plt.sh_offset;
- switch (ehdr.e_machine) {
- case EM_ARM:
- plt_header_size = 20;
- plt_entry_size = 12;
- break;
+ ri.nr_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
- case EM_AARCH64:
- plt_header_size = 32;
- plt_entry_size = 16;
- break;
+ ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA;
- case EM_SPARC:
- plt_header_size = 48;
- plt_entry_size = 12;
- break;
-
- case EM_SPARCV9:
- plt_header_size = 128;
- plt_entry_size = 32;
- break;
-
- default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
- plt_header_size = shdr_plt.sh_entsize;
- plt_entry_size = shdr_plt.sh_entsize;
- break;
+ if (lazy_plt) {
+ /*
+ * Assume a .plt with the same number of entries as the number
+ * of relocation entries is not lazy and does not have a header.
+ */
+ if (ri.nr_entries * plt_entry_size == shdr_plt.sh_size)
+ dso__delete_symbol(dso, plt_sym);
+ else
+ plt_offset += plt_header_size;
}
- plt_offset += plt_header_size;
-
- if (shdr_rel_plt.sh_type == SHT_RELA) {
- GElf_Rela pos_mem, *pos;
- elf_section__for_each_rela(reldata, pos, pos_mem, idx,
- nr_rel_entries) {
- const char *elf_name = NULL;
- char *demangled = NULL;
- symidx = GELF_R_SYM(pos->r_info);
- gelf_getsym(syms, symidx, &sym);
+ /*
+ * x86 doesn't insert IFUNC relocations in .plt order, so sort to get
+ * back in order.
+ */
+ if (machine_is_x86(ehdr.e_machine) && sort_rel(&ri))
+ goto out_elf_end;
- elf_name = elf_sym__name(&sym, symstrs);
- demangled = demangle_sym(dso, 0, elf_name);
- if (demangled != NULL)
- elf_name = demangled;
- snprintf(sympltname, sizeof(sympltname),
- "%s@plt", elf_name);
- free(demangled);
+ for (idx = 0; idx < ri.nr_entries; idx++) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
- f = symbol__new(plt_offset, plt_entry_size,
- STB_GLOBAL, STT_FUNC, sympltname);
- if (!f)
- goto out_elf_end;
+ gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym);
- plt_offset += plt_entry_size;
- symbols__insert(&dso->symbols, f);
- ++nr;
- }
- } else if (shdr_rel_plt.sh_type == SHT_REL) {
- GElf_Rel pos_mem, *pos;
- elf_section__for_each_rel(reldata, pos, pos_mem, idx,
- nr_rel_entries) {
- const char *elf_name = NULL;
- char *demangled = NULL;
- symidx = GELF_R_SYM(pos->r_info);
- gelf_getsym(syms, symidx, &sym);
-
- elf_name = elf_sym__name(&sym, symstrs);
- demangled = demangle_sym(dso, 0, elf_name);
- if (demangled != NULL)
- elf_name = demangled;
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled)
+ elf_name = demangled;
+ if (*elf_name)
+ snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name);
+ else if (!get_ifunc_name(elf, dso, &ehdr, &ri, sympltname, sizeof(sympltname)))
snprintf(sympltname, sizeof(sympltname),
- "%s@plt", elf_name);
- free(demangled);
+ "offset_%#" PRIx64 "@plt", plt_offset);
+ free(demangled);
- f = symbol__new(plt_offset, plt_entry_size,
- STB_GLOBAL, STT_FUNC, sympltname);
- if (!f)
- goto out_elf_end;
+ f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, STT_FUNC, sympltname);
+ if (!f)
+ goto out_elf_end;
- plt_offset += plt_entry_size;
- symbols__insert(&dso->symbols, f);
- ++nr;
- }
+ plt_offset += plt_entry_size;
+ symbols__insert(&dso->symbols, f);
+ ++nr;
}
err = 0;
out_elf_end:
+ exit_rel(&ri);
if (err == 0)
return nr;
pr_debug("%s: problems reading %s PLT info.\n",
@@ -946,8 +1268,9 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+ ss->symtab_idx = 0;
ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab",
- NULL);
+ &ss->symtab_idx);
if (ss->symshdr.sh_type != SHT_SYMTAB)
ss->symtab = NULL;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a3a165ae933a..317c0706852f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -201,10 +201,14 @@ again:
continue;
if (choose_best_symbol(curr, next) == SYMBOL_A) {
+ if (next->type == STT_GNU_IFUNC)
+ curr->ifunc_alias = true;
rb_erase_cached(&next->rb_node, symbols);
symbol__delete(next);
goto again;
} else {
+ if (curr->type == STT_GNU_IFUNC)
+ next->ifunc_alias = true;
nd = rb_next(&curr->rb_node);
rb_erase_cached(&curr->rb_node, symbols);
symbol__delete(curr);
@@ -556,6 +560,11 @@ struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
return dso->last_find_result.symbol;
}
+struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr)
+{
+ return symbols__find(&dso->symbols, addr);
+}
+
struct symbol *dso__first_symbol(struct dso *dso)
{
return symbols__first(&dso->symbols);
@@ -1368,10 +1377,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
/* Find the kernel map using the '_stext' symbol */
if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+ u64 replacement_size = 0;
+
list_for_each_entry(new_map, &md.maps, node) {
- if (stext >= new_map->start && stext < new_map->end) {
+ u64 new_size = new_map->end - new_map->start;
+
+ if (!(stext >= new_map->start && stext < new_map->end))
+ continue;
+
+ /*
+ * On some architectures, ARM64 for example, the kernel
+ * text can get allocated inside of the vmalloc segment.
+ * Select the smallest matching segment, in case stext
+ * falls within more than one in the list.
+ */
+ if (!replacement_map || new_size < replacement_size) {
replacement_map = new_map;
- break;
+ replacement_size = new_size;
}
}
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f735108c4d4e..7558735543c2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -64,6 +64,8 @@ struct symbol {
u8 inlined:1;
/** Has symbol__annotate2 been performed. */
u8 annotate2:1;
+ /** Symbol is an alias of an STT_GNU_IFUNC */
+ u8 ifunc_alias:1;
/** Architecture specific. Unused except on PPC where it holds st_other. */
u8 arch_sym;
/** The name of length namelen associated with the symbol. */
@@ -148,6 +150,7 @@ void dso__delete_symbol(struct dso *dso,
struct symbol *sym);
struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);
struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
struct symbol *symbol__next_by_name(struct symbol *sym);
diff --git a/tools/perf/util/symsrc.h b/tools/perf/util/symsrc.h
index 2665b4bde751..edf82028c9e6 100644
--- a/tools/perf/util/symsrc.h
+++ b/tools/perf/util/symsrc.h
@@ -26,6 +26,7 @@ struct symsrc {
GElf_Shdr opdshdr;
Elf_Scn *symtab;
+ size_t symtab_idx;
GElf_Shdr symshdr;
Elf_Scn *dynsym;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 3ab6a92b1a6d..9ab9308ee80c 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -2219,8 +2219,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16
len = pos->long_name_len + 1;
len = PERF_ALIGN(len, NAME_ALIGN);
- memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
- ev.build_id.size = pos->bid.size;
+ ev.build_id.size = min(pos->bid.size, sizeof(pos->bid.data));
+ memcpy(&ev.build_id.build_id, pos->bid.data, ev.build_id.size);
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
ev.build_id.pid = machine->pid;
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 9b3cd79cca12..a0cff184b1cd 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -21,6 +21,9 @@ struct trace_event {
struct tep_plugin_list *plugin_list;
};
+/* Computes a version number comparable with LIBTRACEEVENT_VERSION from Makefile.config. */
+#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c))
+
typedef char *(tep_func_resolver_t)(void *priv,
unsigned long long *addrp, char **modp);
@@ -137,4 +140,20 @@ int common_lock_depth(struct scripting_context *context);
#define SAMPLE_FLAGS_BUF_SIZE 64
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
+#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
+#include <traceevent/event-parse.h>
+
+static inline bool tep_field_is_relative(unsigned long flags)
+{
+ return (flags & TEP_FIELD_IS_RELATIVE) != 0;
+}
+#else
+#include <linux/compiler.h>
+
+static inline bool tep_field_is_relative(unsigned long flags __maybe_unused)
+{
+ return false;
+}
+#endif
+
#endif /* _PERF_UTIL_TRACE_EVENT_H */