summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build32
-rw-r--r--tools/perf/util/addr_location.c1
-rw-r--r--tools/perf/util/addr_location.h6
-rw-r--r--tools/perf/util/annotate-data.c49
-rw-r--r--tools/perf/util/annotate-data.h13
-rw-r--r--tools/perf/util/annotate.c267
-rw-r--r--tools/perf/util/annotate.h9
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c23
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h11
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c14
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h12
-rw-r--r--tools/perf/util/arm-spe.c167
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c29
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/bpf-filter.l2
-rw-r--r--tools/perf/util/bpf_ftrace.c14
-rw-r--r--tools/perf/util/bpf_lock_contention.c85
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c28
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c245
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h7
-rw-r--r--tools/perf/util/branch.h3
-rw-r--r--tools/perf/util/callchain.c10
-rw-r--r--tools/perf/util/color.h5
-rw-r--r--tools/perf/util/color_config.c11
-rw-r--r--tools/perf/util/comm.c2
-rw-r--r--tools/perf/util/compress.h20
-rw-r--r--tools/perf/util/config.c6
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cpumap.c70
-rw-r--r--tools/perf/util/cs-etm.c31
-rw-r--r--tools/perf/util/data.c20
-rw-r--r--tools/perf/util/data.h1
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debuginfo.c6
-rw-r--r--tools/perf/util/disasm.c27
-rw-r--r--tools/perf/util/dso.c166
-rw-r--r--tools/perf/util/dso.h82
-rw-r--r--tools/perf/util/env.c2
-rw-r--r--tools/perf/util/event.c11
-rw-r--r--tools/perf/util/event.h12
-rw-r--r--tools/perf/util/events_stats.h2
-rw-r--r--tools/perf/util/evlist.c32
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c51
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/expr.c2
-rw-r--r--tools/perf/util/ftrace.h2
-rw-r--r--tools/perf/util/header.c33
-rw-r--r--tools/perf/util/hist.c108
-rw-r--r--tools/perf/util/hist.h32
-rw-r--r--tools/perf/util/hwmon_pmu.c56
-rw-r--r--tools/perf/util/hwmon_pmu.h16
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt.c136
-rw-r--r--tools/perf/util/intel-tpebs.c2
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/lock-contention.h7
-rw-r--r--tools/perf/util/lzma.c29
-rw-r--r--tools/perf/util/machine.c92
-rw-r--r--tools/perf/util/machine.h8
-rw-r--r--tools/perf/util/maps.c58
-rw-r--r--tools/perf/util/mem-events.c74
-rw-r--r--tools/perf/util/mem-events.h3
-rw-r--r--tools/perf/util/mmap.c15
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/mutex.h8
-rw-r--r--tools/perf/util/parse-events.c179
-rw-r--r--tools/perf/util/parse-events.l51
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c124
-rw-r--r--tools/perf/util/pmu.c315
-rw-r--r--tools/perf/util/pmu.h12
-rw-r--r--tools/perf/util/pmus.c202
-rw-r--r--tools/perf/util/pmus.h1
-rw-r--r--tools/perf/util/probe-finder.c21
-rw-r--r--tools/perf/util/probe-finder.h1
-rw-r--r--tools/perf/util/pstack.c14
-rw-r--r--tools/perf/util/pstack.h1
-rw-r--r--tools/perf/util/python.c160
-rw-r--r--tools/perf/util/rb_resort.h146
-rw-r--r--tools/perf/util/s390-cpumsf.c6
-rw-r--r--tools/perf/util/sample.c43
-rw-r--r--tools/perf/util/sample.h11
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c29
-rw-r--r--tools/perf/util/session.c106
-rw-r--r--tools/perf/util/session.h1
-rw-r--r--tools/perf/util/setup.py10
-rw-r--r--tools/perf/util/sort.c152
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c111
-rw-r--r--tools/perf/util/stat-shadow.c3
-rw-r--r--tools/perf/util/stat.c13
-rw-r--r--tools/perf/util/symbol-elf.c133
-rw-r--r--tools/perf/util/symbol.c36
-rw-r--r--tools/perf/util/symbol_conf.h8
-rw-r--r--tools/perf/util/synthetic-events.c46
-rw-r--r--tools/perf/util/syscalltbl.c148
-rw-r--r--tools/perf/util/syscalltbl.h22
-rw-r--r--tools/perf/util/thread.c80
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/tool_pmu.c35
-rw-r--r--tools/perf/util/tool_pmu.h2
-rw-r--r--tools/perf/util/trace-event-scripting.c116
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/units.c2
-rw-r--r--tools/perf/util/unwind-libdw.c9
-rw-r--r--tools/perf/util/unwind-libunwind-local.c28
107 files changed, 3281 insertions, 1420 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5ec97e8d6b6d..946bce6628f3 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -67,6 +67,7 @@ perf-util-y += maps.o
perf-util-y += pstack.o
perf-util-y += session.o
perf-util-y += tool.o
+perf-util-y += sample.o
perf-util-y += sample-raw.o
perf-util-y += s390-sample-raw.o
perf-util-y += amd-sample-raw.o
@@ -405,14 +406,39 @@ $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
ifdef SHELLCHECK
SHELL_TESTS := generate-cmdlist.sh
- TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+ SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
else
SHELL_TESTS :=
- TEST_LOGS :=
+ SHELL_TEST_LOGS :=
endif
$(OUTPUT)%.shellcheck_log: %
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
-perf-util-y += $(TEST_LOGS)
+perf-util-y += $(SHELL_TEST_LOGS)
+
+PY_TESTS := setup.py
+ifdef MYPY
+ MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log)
+else
+ MYPY_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.mypy_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(MYPY_TEST_LOGS)
+
+ifdef PYLINT
+ PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log)
+else
+ PYLINT_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.pylint_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(PYLINT_TEST_LOGS)
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
index 51825ef8c0ab..007a2f5df9a6 100644
--- a/tools/perf/util/addr_location.c
+++ b/tools/perf/util/addr_location.c
@@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
al->cpumode = 0;
al->cpu = 0;
al->socket = 0;
+ al->parallelism = 1;
}
/*
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
index d8ac0428dff2..64b551025216 100644
--- a/tools/perf/util/addr_location.h
+++ b/tools/perf/util/addr_location.h
@@ -17,10 +17,14 @@ struct addr_location {
const char *srcline;
u64 addr;
char level;
- u8 filtered;
u8 cpumode;
+ u16 filtered;
s32 cpu;
s32 socket;
+ /* Same as machine.parallelism but within [1, nr_cpus]. */
+ int parallelism;
+ /* See he_stat.latency. */
+ u64 latency;
};
void addr_location__init(struct addr_location *al);
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 976abedca09e..1ef2edbc71d9 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -314,6 +314,40 @@ static void delete_members(struct annotated_member *member)
}
}
+static int fill_member_name(char *buf, size_t sz, struct annotated_member *m,
+ int offset, bool first)
+{
+ struct annotated_member *child;
+
+ if (list_empty(&m->children))
+ return 0;
+
+ list_for_each_entry(child, &m->children, node) {
+ int len;
+
+ if (offset < child->offset || offset >= child->offset + child->size)
+ continue;
+
+ /* It can have anonymous struct/union members */
+ if (child->var_name) {
+ len = scnprintf(buf, sz, "%s%s",
+ first ? "" : ".", child->var_name);
+ first = false;
+ } else {
+ len = 0;
+ }
+
+ return fill_member_name(buf + len, sz - len, child, offset, first) + len;
+ }
+ return 0;
+}
+
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset)
+{
+ return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true);
+}
+
static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
Dwarf_Die *type_die)
{
@@ -830,7 +864,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
continue;
- if (var->reg == DWARF_REG_FB || var->reg == fbreg) {
+ if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) {
int offset = var->offset;
struct type_state_stack *stack;
@@ -845,8 +879,13 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
findnew_stack_state(state, offset, TSR_KIND_TYPE,
&mem_die);
- pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -offset);
+ if (var->reg == state->stack_reg) {
+ pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)",
+ insn_offset, offset, state->stack_reg);
+ } else {
+ pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
+ insn_offset, -offset);
+ }
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
} else if (has_reg_type(state, var->reg) && var->offset == 0) {
struct type_state_reg *reg;
@@ -1127,10 +1166,10 @@ again:
}
check_non_register:
- if (reg == dloc->fbreg) {
+ if (reg == dloc->fbreg || reg == state->stack_reg) {
struct type_state_stack *stack;
- pr_debug_dtp("fbreg");
+ pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack");
stack = find_stack_state(state, dloc->type_offset);
if (stack == NULL) {
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 98c80b2268dd..541fee1a5f0a 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -227,8 +227,13 @@ void annotated_data_type__tree_delete(struct rb_root *root);
/* Release all global variable information in the tree */
void global_var_type__tree_delete(struct rb_root *root);
+/* Print data type annotation (including members) on stdout */
int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
+/* Get name of member field at the given offset in the data type */
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset);
+
bool has_reg_type(struct type_state *state, int reg);
struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
@@ -276,6 +281,14 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un
return -1;
}
+static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused,
+ char *buf __maybe_unused,
+ size_t sz __maybe_unused,
+ int member_offset __maybe_unused)
+{
+ return -1;
+}
+
#endif /* HAVE_LIBDW_SUPPORT */
#ifdef HAVE_SLANG_SUPPORT
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 31bb326b07a6..1e59b9e5339d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -87,6 +87,8 @@ struct annotated_data_type canary_type = {
},
};
+#define NO_TYPE ((struct annotated_data_type *)-1UL)
+
/* symbol histogram: key = offset << 16 | evsel->core.idx */
static size_t sym_hist_hash(long key, void *ctx __maybe_unused)
{
@@ -758,15 +760,31 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi
return 0;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset);
+
+struct annotation_print_data {
+ struct hist_entry *he;
+ struct evsel *evsel;
+ struct arch *arch;
+ struct debuginfo *dbg;
+ u64 start;
+ int addr_fmt_width;
+};
+
static int
-annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
- struct evsel *evsel, u64 len, int min_pcnt, int printed,
- int max_lines, struct annotation_line *queue, int addr_fmt_width,
- int percent_type)
+annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd,
+ struct annotation_options *opts, int printed,
+ struct annotation_line *queue)
{
+ struct symbol *sym = apd->he->ms.sym;
struct disasm_line *dl = container_of(al, struct disasm_line, al);
struct annotation *notes = symbol__annotation(sym);
static const char *prev_line;
+ int max_lines = opts->max_lines;
+ int percent_type = opts->percent_type;
if (al->offset != -1) {
double max_percent = 0.0;
@@ -786,19 +804,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (al->data_nr > nr_percent)
nr_percent = al->data_nr;
- if (max_percent < min_pcnt)
+ if (max_percent < opts->min_pcnt)
return -1;
if (max_lines && printed >= max_lines)
return 1;
if (queue != NULL) {
+ struct annotation_options queue_opts = {
+ .max_lines = 1,
+ .percent_type = percent_type,
+ };
+
list_for_each_entry_from(queue, &notes->src->source, node) {
if (queue == al)
break;
- annotation_line__print(queue, sym, start, evsel, len,
- 0, 0, 1, NULL, addr_fmt_width,
- percent_type);
+ annotation_line__print(queue, apd, &queue_opts,
+ /*printed=*/0, /*queue=*/NULL);
}
}
@@ -823,7 +845,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
printf(" : ");
- disasm_line__print(dl, start, addr_fmt_width);
+ disasm_line__print(dl, apd->start, apd->addr_fmt_width);
+
+ if (opts->code_with_type && apd->dbg) {
+ struct annotated_data_type *data_type;
+ int offset = 0;
+
+ data_type = __hist_entry__get_data_type(apd->he, apd->arch,
+ apd->dbg, dl, &offset);
+ if (data_type && data_type != NO_TYPE) {
+ char buf[4096];
+
+ printf("\t\t# data-type: %s",
+ data_type->self.type_name);
+
+ if (data_type != &stackop_type &&
+ data_type != &canary_type)
+ printf(" +%#x", offset);
+
+ if (annotated_data_type__get_member_name(data_type,
+ buf,
+ sizeof(buf),
+ offset))
+ printf(" (%s)", buf);
+ }
+ }
/*
* Also color the filename and line if needed, with
@@ -849,7 +895,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (!*al->line)
printf(" %*s:\n", width, " ");
else
- printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line);
+ printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width,
+ al->line_nr, al->line);
}
return 0;
@@ -1167,8 +1214,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
return 0;
}
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct map *map = ms->map;
struct symbol *sym = ms->sym;
struct dso *dso = map__dso(map);
@@ -1179,11 +1227,14 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
struct sym_hist *h = annotation__histogram(notes, evsel);
struct annotation_line *pos, *queue = NULL;
struct annotation_options *opts = &annotate_opts;
- u64 start = map__rip_2objdump(map, sym->start);
- int printed = 2, queue_len = 0, addr_fmt_width;
+ struct annotation_print_data apd = {
+ .he = he,
+ .evsel = evsel,
+ .start = map__rip_2objdump(map, sym->start),
+ };
+ int printed = 2, queue_len = 0;
int more = 0;
bool context = opts->context;
- u64 len;
int width = annotation__pcnt_width(notes);
int graph_dotted_len;
char buf[512];
@@ -1197,8 +1248,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
else
d_filename = basename(filename);
- len = symbol__size(sym);
-
if (evsel__is_group_event(evsel)) {
evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
@@ -1217,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
if (verbose > 0)
symbol__annotate_hits(sym, evsel);
- addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+ apd.addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source,
+ apd.start);
+ evsel__get_arch(evsel, &apd.arch);
+ apd.dbg = debuginfo__new(filename);
list_for_each_entry(pos, &notes->src->source, node) {
int err;
@@ -1227,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
queue_len = 0;
}
- err = annotation_line__print(pos, sym, start, evsel, len,
- opts->min_pcnt, printed, opts->max_lines,
- queue, addr_fmt_width, opts->percent_type);
+ err = annotation_line__print(pos, &apd, opts, printed, queue);
switch (err) {
case 0:
@@ -1260,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
}
}
+ debuginfo__delete(apd.dbg);
free(filename);
return more;
@@ -1597,8 +1648,9 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root)
annotation__calc_lines(notes, ms, root);
}
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1632,8 +1684,9 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
return 0;
}
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1657,7 +1710,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
print_summary(&source_line, dso__long_name(dso));
}
- symbol__annotate_printf(ms, evsel);
+ hist_entry__annotate_printf(he, evsel);
annotated_source__purge(symbol__annotation(sym)->src);
@@ -2643,6 +2696,92 @@ void debuginfo_cache__delete(void)
di_cache.dbg = NULL;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset)
+{
+ struct map_symbol *ms = &he->ms;
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *op_loc;
+ struct annotated_data_type *mem_type;
+ struct annotated_item_stat *istat;
+ int i;
+
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat == NULL) {
+ ann_data_stat.no_insn++;
+ return NO_TYPE;
+ }
+
+ if (annotate_get_insn_location(arch, dl, &loc) < 0) {
+ ann_data_stat.no_insn_ops++;
+ istat->bad++;
+ return NO_TYPE;
+ }
+
+ if (is_stack_operation(arch, dl)) {
+ istat->good++;
+ *type_offset = 0;
+ return &stackop_type;
+ }
+
+ for_each_insn_op_loc(&loc, i, op_loc) {
+ struct data_loc_info dloc = {
+ .arch = arch,
+ .thread = he->thread,
+ .ms = ms,
+ .ip = ms->sym->start + dl->al.offset,
+ .cpumode = he->cpumode,
+ .op = op_loc,
+ .di = dbg,
+ };
+
+ if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
+ continue;
+
+ /* PC-relative addressing */
+ if (op_loc->reg1 == DWARF_REG_PC) {
+ dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
+ op_loc->offset, dl);
+ }
+
+ /* This CPU access in kernel - pretend PC-relative addressing */
+ if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
+ op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
+ dloc.var_addr = op_loc->offset;
+ op_loc->reg1 = DWARF_REG_PC;
+ }
+
+ mem_type = find_data_type(&dloc);
+
+ if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
+ istat->good++;
+ *type_offset = 0;
+ return &canary_type;
+ }
+
+ if (mem_type)
+ istat->good++;
+ else
+ istat->bad++;
+
+ if (symbol_conf.annotate_data_sample) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ annotated_data_type__update_samples(mem_type, evsel,
+ dloc.type_offset,
+ he->stat.nr_events,
+ he->stat.period);
+ }
+ *type_offset = dloc.type_offset;
+ return mem_type ?: NO_TYPE;
+ }
+
+ /* retry with a fused instruction */
+ return NULL;
+}
+
/**
* hist_entry__get_data_type - find data type for given hist entry
* @he: hist entry
@@ -2658,12 +2797,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
struct evsel *evsel = hists_to_evsel(he->hists);
struct arch *arch;
struct disasm_line *dl;
- struct annotated_insn_loc loc;
- struct annotated_op_loc *op_loc;
struct annotated_data_type *mem_type;
struct annotated_item_stat *istat;
u64 ip = he->ip;
- int i;
ann_data_stat.total++;
@@ -2715,77 +2851,10 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
}
retry:
- istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
- if (istat == NULL) {
- ann_data_stat.no_insn++;
- return NULL;
- }
-
- if (annotate_get_insn_location(arch, dl, &loc) < 0) {
- ann_data_stat.no_insn_ops++;
- istat->bad++;
- return NULL;
- }
-
- if (is_stack_operation(arch, dl)) {
- istat->good++;
- he->mem_type_off = 0;
- return &stackop_type;
- }
-
- for_each_insn_op_loc(&loc, i, op_loc) {
- struct data_loc_info dloc = {
- .arch = arch,
- .thread = he->thread,
- .ms = ms,
- /* Recalculate IP for LOCK prefix or insn fusion */
- .ip = ms->sym->start + dl->al.offset,
- .cpumode = he->cpumode,
- .op = op_loc,
- .di = di_cache.dbg,
- };
-
- if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
- continue;
-
- /* Recalculate IP because of LOCK prefix or insn fusion */
- ip = ms->sym->start + dl->al.offset;
-
- /* PC-relative addressing */
- if (op_loc->reg1 == DWARF_REG_PC) {
- dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
- op_loc->offset, dl);
- }
-
- /* This CPU access in kernel - pretend PC-relative addressing */
- if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
- op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
- dloc.var_addr = op_loc->offset;
- op_loc->reg1 = DWARF_REG_PC;
- }
-
- mem_type = find_data_type(&dloc);
-
- if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
- istat->good++;
- he->mem_type_off = 0;
- return &canary_type;
- }
-
- if (mem_type)
- istat->good++;
- else
- istat->bad++;
-
- if (symbol_conf.annotate_data_sample) {
- annotated_data_type__update_samples(mem_type, evsel,
- dloc.type_offset,
- he->stat.nr_events,
- he->stat.period);
- }
- he->mem_type_off = dloc.type_offset;
- return mem_type;
- }
+ mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl,
+ &he->mem_type_off);
+ if (mem_type)
+ return mem_type == NO_TYPE ? NULL : mem_type;
/*
* Some instructions can be fused and the actual memory access came
@@ -2805,7 +2874,9 @@ retry:
}
ann_data_stat.no_mem_ops++;
- istat->bad++;
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat)
+ istat->bad++;
return NULL;
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 98db1b88daf4..0e6e3f60a897 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -55,9 +55,11 @@ struct annotation_options {
show_asm_raw,
show_br_cntr,
annotate_src,
+ code_with_type,
full_addr;
u8 offset_level;
u8 disassemblers[MAX_DISASSEMBLERS];
+ u8 disassembler_used;
int min_pcnt;
int max_lines;
int context;
@@ -455,7 +457,6 @@ enum symbol_disassemble_errno {
int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel);
void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel);
void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel);
void annotated_source__purge(struct annotated_source *as);
@@ -464,9 +465,9 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
bool ui__has_annotation(void);
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel);
-
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel);
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel);
#ifdef HAVE_SLANG_SUPPORT
int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index ba807071d3c1..688fe6d75244 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
/* Instruction virtual address or Branch target address */
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
- index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH ||
+ index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) {
ns = SPE_ADDR_PKT_GET_NS(payload);
el = SPE_ADDR_PKT_GET_EL(payload);
@@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.virt_addr = ip;
else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
decoder->record.phys_addr = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH)
+ decoder->record.prev_br_tgt = ip;
break;
case ARM_SPE_COUNTER:
if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
@@ -207,6 +210,18 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_BR_COND;
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
+ decoder->record.op |= ARM_SPE_OP_BR_INDIRECT;
+ if (payload & SPE_OP_PKT_GCS)
+ decoder->record.op |= ARM_SPE_OP_BR_GCS;
+ if (SPE_OP_PKT_CR_BL(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_BL;
+ if (SPE_OP_PKT_CR_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_RET;
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET;
break;
default:
pr_err("Get packet error!\n");
@@ -238,6 +253,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
if (payload & BIT(EV_MISPRED))
decoder->record.type |= ARM_SPE_BRANCH_MISS;
+ if (payload & BIT(EV_NOT_TAKEN))
+ decoder->record.type |= ARM_SPE_BRANCH_NOT_TAKEN;
+
+ if (payload & BIT(EV_TRANSACTIONAL))
+ decoder->record.type |= ARM_SPE_IN_TXN;
+
if (payload & BIT(EV_PARTIAL_PREDICATE))
decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 4bcd627e859f..5d232188643b 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -24,6 +24,8 @@ enum arm_spe_sample_type {
ARM_SPE_REMOTE_ACCESS = 1 << 7,
ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
+ ARM_SPE_BRANCH_NOT_TAKEN = 1 << 10,
+ ARM_SPE_IN_TXN = 1 << 11,
};
enum arm_spe_op_type {
@@ -52,8 +54,12 @@ enum arm_spe_op_type {
ARM_SPE_OP_SVE_SG = 1 << 27,
/* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_COND = 1 << 16,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_GCS = 1 << 18,
+ ARM_SPE_OP_BR_CR_BL = 1 << 19,
+ ARM_SPE_OP_BR_CR_RET = 1 << 20,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
};
enum arm_spe_common_data_source {
@@ -83,6 +89,7 @@ struct arm_spe_record {
u32 latency;
u64 from_ip;
u64 to_ip;
+ u64 prev_br_tgt;
u64 timestamp;
u64 virt_addr;
u64 phys_addr;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 4cef10a83962..13cadb2f1cea 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -308,6 +308,8 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
if (payload & BIT(EV_ALIGNMENT))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+ if (payload & BIT(EV_TRANSACTIONAL))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN");
if (payload & BIT(EV_PARTIAL_PREDICATE))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
if (payload & BIT(EV_EMPTY_PREDICATE))
@@ -397,10 +399,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
if (payload & SPE_OP_PKT_COND)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
-
- if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
-
+ if (payload & SPE_OP_PKT_GCS)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (SPE_OP_PKT_CR_BL(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL");
+ if (SPE_OP_PKT_CR_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET");
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET");
break;
default:
/* Unknown index */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 464a912b221c..2cdf9f6da268 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -7,6 +7,7 @@
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+#include <linux/bitfield.h>
#include <stddef.h>
#include <stdint.h>
@@ -104,6 +105,7 @@ enum arm_spe_events {
EV_LLC_MISS = 9,
EV_REMOTE_ACCESS = 10,
EV_ALIGNMENT = 11,
+ EV_TRANSACTIONAL = 16,
EV_PARTIAL_PREDICATE = 17,
EV_EMPTY_PREDICATE = 18,
};
@@ -116,8 +118,6 @@ enum arm_spe_events {
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
-#define SPE_OP_PKT_COND BIT(0)
-
#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
@@ -148,7 +148,13 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
-#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2)
+#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
+#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
+#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
+#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3)
+#define SPE_OP_PKT_GCS BIT(2)
+#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1)
+#define SPE_OP_PKT_COND BIT(0)
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 12761c39788f..2a9775649cc2 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -37,6 +37,8 @@
#include "../../arch/arm64/include/asm/cputype.h"
#define MAX_TIMESTAMP (~0ULL)
+#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -101,6 +103,7 @@ struct arm_spe_queue {
struct thread *thread;
u64 period_instructions;
u32 flags;
+ struct branch_stack *last_branch;
};
struct data_source_handle {
@@ -231,6 +234,17 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
params.get_trace = arm_spe_get_trace;
params.data = speq;
+ if (spe->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ /* Allocate up to two entries for PBT + TGT */
+ sz += sizeof(struct branch_entry) *
+ min(spe->synth_opts.last_branch_sz, 2U);
+ speq->last_branch = zalloc(sz);
+ if (!speq->last_branch)
+ goto out_free;
+ }
+
/* create new decoder */
speq->decoder = arm_spe_decoder_new(&params);
if (!speq->decoder)
@@ -240,6 +254,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
out_free:
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
return NULL;
@@ -346,6 +361,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
event->sample.header.size = sizeof(struct perf_event_header);
}
+static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ struct branch_stack *bstack = speq->last_branch;
+ struct branch_flags *bs_flags;
+ unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
+ bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
+ bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
+ size_t sz = sizeof(struct branch_stack) +
+ sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
+ int i = 0;
+
+ /* Clean up branch stack */
+ memset(bstack, 0x0, sz);
+
+ if (!have_tgt && !have_pbt)
+ return;
+
+ if (have_tgt) {
+ bstack->entries[i].from = record->from_ip;
+ bstack->entries[i].to = record->to_ip;
+
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->value = 0;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_CALL;
+ else
+ bs_flags->type |= PERF_BR_CALL;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take this case as function return.
+ */
+ } else if (record->op & ARM_SPE_OP_BR_CR_RET ||
+ record->op & ARM_SPE_OP_BR_INDIRECT) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_RET;
+ else
+ bs_flags->type |= PERF_BR_RET;
+ } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNCOND;
+ } else {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_MISS) {
+ bs_flags->mispred = 1;
+ bs_flags->predicted = 0;
+ } else {
+ bs_flags->mispred = 0;
+ bs_flags->predicted = 1;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ bs_flags->not_taken = 1;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ bs_flags->in_tx = 1;
+
+ bs_flags->cycles = min(record->latency, 0xFFFFU);
+ i++;
+ }
+
+ if (have_pbt) {
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ bstack->entries[i].to = record->prev_br_tgt;
+ i++;
+ }
+
+ bstack->nr = i;
+ bstack->hw_idx = -1ULL;
+}
+
static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
{
event->header.size = perf_event__sample_event_size(sample, type, 0);
@@ -379,8 +476,10 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -390,7 +489,9 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.data_src = data_src;
sample.weight = record->latency;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
@@ -399,8 +500,10 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -408,8 +511,11 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.addr = record->to_ip;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
@@ -418,7 +524,8 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
/*
* Handles perf instruction sampling period.
@@ -428,6 +535,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return 0;
speq->period_instructions = 0;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -438,8 +546,11 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.period = spe->instructions_sample_period;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static const struct midr_range common_ds_encoding_cpus[] = {
@@ -470,6 +581,26 @@ static void arm_spe__sample_flags(struct arm_spe_queue *speq)
if (record->type & ARM_SPE_BRANCH_MISS)
speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ speq->flags |= PERF_IP_FLAG_IN_TX;
+
+ if (record->op & ARM_SPE_OP_BR_COND)
+ speq->flags |= PERF_IP_FLAG_CONDITIONAL;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL)
+ speq->flags |= PERF_IP_FLAG_CALL;
+ else if (record->op & ARM_SPE_OP_BR_CR_RET)
+ speq->flags |= PERF_IP_FLAG_RETURN;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take it as a function return.
+ */
+ else if (record->op & ARM_SPE_OP_BR_INDIRECT)
+ speq->flags |= PERF_IP_FLAG_RETURN;
}
}
@@ -669,6 +800,10 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
+ /* Only synthesize data source for LDST operations */
+ if (!is_ldst_op(record->op))
+ return 0;
+
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
else if (record->op & ARM_SPE_OP_ST)
@@ -749,6 +884,10 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
}
}
+ if (spe->synth_opts.last_branch &&
+ (spe->sample_branch || spe->sample_instructions))
+ arm_spe__prep_branch_stack(speq);
+
if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
err = arm_spe__synth_branch_sample(speq, spe->branch_id);
if (err)
@@ -767,7 +906,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
* When data_src is zero it means the record is not a memory operation,
* skip to synthesize memory sample for this case.
*/
- if (spe->sample_memory && data_src) {
+ if (spe->sample_memory && is_ldst_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
@@ -1240,6 +1379,7 @@ static void arm_spe_free_queue(void *priv)
thread__zput(speq->thread);
arm_spe_decoder_free(speq->decoder);
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
}
@@ -1459,6 +1599,19 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
}
+ if (spe->synth_opts.last_branch) {
+ if (spe->synth_opts.last_branch_sz > 2)
+ pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
+
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+
if (spe->synth_opts.branches) {
spe->sample_branch = true;
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
index 4940be4a0569..958afe8b821e 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -4,6 +4,7 @@
#include "event.h"
#include "perf_regs.h" // SMPL_REG_MASK
#include "unwind.h"
+#include <string.h>
#define perf_event_arm_regs perf_event_arm64_regs
#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
@@ -16,8 +17,13 @@ struct entries {
static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
{
- return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
- && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+ struct regs_dump *regs;
+
+ if (callchain_param.record_mode != CALLCHAIN_FP)
+ return false;
+
+ regs = perf_sample__user_regs(sample);
+ return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
}
static int add_entry(struct unwind_entry *entry, void *arg)
@@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
{
int ret;
struct entries entries = {};
- struct regs_dump old_regs = sample->user_regs;
+ struct regs_dump old_regs, *regs;
if (!get_leaf_frame_caller_enabled(sample))
return 0;
@@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
* and set its mask. SP is not used when doing the unwinding but it
* still needs to be set to prevent failures.
*/
-
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
- sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ regs = perf_sample__user_regs(sample);
+ memcpy(&old_regs, regs, sizeof(*regs));
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
}
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
- sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ regs->cache_regs[PERF_REG_ARM64_SP] = 0;
}
ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
- sample->user_regs = old_regs;
+ memcpy(regs, &old_regs, sizeof(*regs));
if (ret || entries.length != 2)
return ret;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 4d1633d87eff..03211c2623de 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1173,16 +1173,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session,
if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
return 0;
+ perf_sample__init(&sample, /*all=*/false);
err = evlist__parse_sample(session->evlist, event, &sample);
if (err)
- return err;
-
- if (!sample.aux_sample.size)
- return 0;
+ goto out;
- offset += sample.aux_sample.data - (void *)event;
+ if (sample.aux_sample.size) {
+ offset += sample.aux_sample.data - (void *)event;
- return session->auxtrace->queue_data(session, &sample, NULL, offset);
+ err = session->auxtrace->queue_data(session, &sample, NULL, offset);
+ }
+out:
+ perf_sample__exit(&sample);
+ return err;
}
int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index f313404f95a9..6aa65ade3385 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -76,7 +76,7 @@ static int path_or_error(void)
num_dec [0-9]+
num_hex 0[Xx][0-9a-fA-F]+
space [ \t]+
-path [^ \t\n]+
+path [^ \t\n,]+
ident [_a-zA-Z][_a-zA-Z0-9]+
%%
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 25fc280e414a..7324668cc83e 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
skel->rodata->bucket_range = ftrace->bucket_range;
skel->rodata->min_latency = ftrace->min_latency;
+ skel->rodata->bucket_num = ftrace->bucket_num;
+ if (ftrace->bucket_range && ftrace->bucket_num) {
+ bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
+ }
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
@@ -124,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
return 0;
}
-int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
int buckets[], struct stats *stats)
{
int i, fd, err;
@@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
if (hist == NULL)
return -ENOMEM;
- for (idx = 0; idx < NUM_BUCKET; idx++) {
+ for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
err = bpf_map_lookup_elem(fd, &idx, hist);
if (err) {
buckets[idx] = 0;
@@ -154,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
stats->n = skel->bss->count;
stats->max = skel->bss->max;
stats->min = skel->bss->min;
+
+ if (!ftrace->use_nsec) {
+ stats->mean /= 1000;
+ stats->max /= 1000;
+ stats->min /= 1000;
+ }
}
free(hist);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index fc8666222399..5af8f6d1bc95 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -131,10 +131,20 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.task_data, 1);
- if (con->save_callstack)
+ if (con->save_callstack) {
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
- else
+ if (con->owner) {
+ bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64));
+ bpf_map__set_key_size(skel->maps.owner_stacks,
+ con->max_stack * sizeof(u64));
+ bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries);
+ skel->rodata->max_stack = con->max_stack;
+ }
+ } else {
bpf_map__set_max_entries(skel->maps.stacks, 1);
+ }
if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
@@ -450,7 +460,6 @@ static const char *lock_contention_get_name(struct lock_contention *con,
{
int idx = 0;
u64 addr;
- const char *name = "";
static char name_buf[KSYM_NAME_LEN];
struct symbol *sym;
struct map *kmap;
@@ -465,13 +474,14 @@ static const char *lock_contention_get_name(struct lock_contention *con,
if (pid) {
struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
- if (t == NULL)
- return name;
- if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
- thread__set_comm(t, task.comm, /*timestamp=*/0))
- name = task.comm;
+ if (t != NULL &&
+ !bpf_map_lookup_elem(task_fd, &pid, &task) &&
+ thread__set_comm(t, task.comm, /*timestamp=*/0)) {
+ snprintf(name_buf, sizeof(name_buf), "%s", task.comm);
+ return name_buf;
+ }
}
- return name;
+ return "";
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
@@ -539,6 +549,63 @@ static const char *lock_contention_get_name(struct lock_contention *con,
return name_buf;
}
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con)
+{
+ int stacks_fd, stat_fd;
+ u64 *stack_trace = NULL;
+ s32 stack_id;
+ struct contention_key ckey = {};
+ struct contention_data cdata = {};
+ size_t stack_size = con->max_stack * sizeof(*stack_trace);
+ struct lock_stat *st = NULL;
+
+ stacks_fd = bpf_map__fd(skel->maps.owner_stacks);
+ stat_fd = bpf_map__fd(skel->maps.owner_stat);
+ if (!stacks_fd || !stat_fd)
+ goto out_err;
+
+ stack_trace = zalloc(stack_size);
+ if (stack_trace == NULL)
+ goto out_err;
+
+ if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace))
+ goto out_err;
+
+ bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id);
+ ckey.stack_id = stack_id;
+ bpf_map_lookup_elem(stat_fd, &ckey, &cdata);
+
+ st = zalloc(sizeof(struct lock_stat));
+ if (!st)
+ goto out_err;
+
+ st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) :
+ "unknown");
+ if (!st->name)
+ goto out_err;
+
+ st->flags = cdata.flags;
+ st->nr_contended = cdata.count;
+ st->wait_time_total = cdata.total_time;
+ st->wait_time_max = cdata.max_time;
+ st->wait_time_min = cdata.min_time;
+ st->callstack = stack_trace;
+
+ if (cdata.count)
+ st->avg_wait_time = cdata.total_time / cdata.count;
+
+ bpf_map_delete_elem(stacks_fd, stack_trace);
+ bpf_map_delete_elem(stat_fd, &ckey);
+
+ return st;
+
+out_err:
+ free(stack_trace);
+ free(st);
+
+ return NULL;
+}
+
int lock_contention_read(struct lock_contention *con)
{
int fd, stack, err = 0;
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index fb144811b34f..e731a79a753a 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -50,6 +50,7 @@ const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
+const volatile unsigned int bucket_num = NUM_BUCKET;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -101,6 +102,7 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
+ __u64 val = delta;
__u32 key = 0;
__u64 *hist;
@@ -110,30 +112,27 @@ int BPF_PROG(func_end)
return 0;
if (bucket_range != 0) {
- delta /= cmp_base;
+ val = delta / cmp_base;
if (min_latency > 0) {
- if (delta > min_latency)
- delta -= min_latency;
+ if (val > min_latency)
+ val -= min_latency;
else
goto do_lookup;
}
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
- if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
- // clang 12 doesn't like s64 / u32 division
- key = (__u64)delta / bucket_range + 1;
- if (key >= NUM_BUCKET ||
- delta >= max_latency - min_latency)
- key = NUM_BUCKET - 1;
+ if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = val / bucket_range + 1;
+ if (key >= bucket_num)
+ key = bucket_num - 1;
}
- delta += min_latency;
goto do_lookup;
}
// calculate index using delta
- for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ for (key = 0; key < (bucket_num - 1); key++) {
if (delta < (cmp_base << key))
break;
}
@@ -143,12 +142,9 @@ do_lookup:
if (!hist)
return 0;
- *hist += 1;
+ __sync_fetch_and_add(hist, 1);
- if (bucket_range == 0)
- delta /= cmp_base;
-
- __sync_fetch_and_add(&total, delta);
+ __sync_fetch_and_add(&total, delta); // always in nsec
__sync_fetch_and_add(&count, 1);
if (delta > max)
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
index cbd79bc4b330..9ce9c8dddc4b 100644
--- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -80,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1,
for (i = 0; i < sz; i++) {
ret = (unsigned char)s1[i] - (unsigned char)s2[i];
- if (ret || !s1[i] || !s2[i])
+ if (ret || !s1[i])
break;
}
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 6533ea9b044c..69be7a4234e0 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -27,6 +27,38 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");
+/* buffer for owner stacktrace */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 1);
+} stack_buf SEC(".maps");
+
+/* a map for tracing owner stacktrace to owner stack id */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // owner stacktrace
+ __uint(value_size, sizeof(__s32)); // owner stack id
+ __uint(max_entries, 1);
+} owner_stacks SEC(".maps");
+
+/* a map for tracing lock address to owner data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // lock address
+ __uint(value_size, sizeof(struct owner_tracing_data));
+ __uint(max_entries, 1);
+} owner_data SEC(".maps");
+
+/* a map for contention_key (stores owner stack id) to contention data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct contention_key));
+ __uint(value_size, sizeof(struct contention_data));
+ __uint(max_entries, 1);
+} owner_stat SEC(".maps");
+
/* maintain timestamp at the beginning of contention */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
@@ -143,6 +175,7 @@ const volatile int needs_callstack;
const volatile int stack_skip;
const volatile int lock_owner;
const volatile int use_cgroup_v2;
+const volatile int max_stack;
/* determine the key of lock stat */
const volatile int aggr_mode;
@@ -164,6 +197,9 @@ int data_fail;
int task_map_full;
int data_map_full;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak;
+void bpf_task_release(struct task_struct *p) __ksym __weak;
+
static inline __u64 get_current_cgroup_id(void)
{
struct task_struct *task;
@@ -387,6 +423,61 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
return pelem;
}
+static inline s32 get_owner_stack_id(u64 *stacktrace)
+{
+ s32 *id, new_id;
+ static s64 id_gen = 1;
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ new_id = (s32)__sync_fetch_and_add(&id_gen, 1);
+
+ bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST);
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ return -1;
+}
+
+static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
+{
+ __sync_fetch_and_add(&data->total_time, duration);
+ __sync_fetch_and_add(&data->count, count);
+
+ /* FIXME: need atomic operations */
+ if (data->max_time < duration)
+ data->max_time = duration;
+ if (data->min_time > duration)
+ data->min_time = duration;
+}
+
+static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
+{
+ struct contention_key key = {
+ .stack_id = id,
+ .pid = 0,
+ .lock_addr_or_cgroup = 0,
+ };
+ struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key);
+
+ if (!data) {
+ struct contention_data first = {
+ .total_time = duration,
+ .max_time = duration,
+ .min_time = duration,
+ .count = 1,
+ .flags = flags,
+ };
+ bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST);
+ } else {
+ update_contention_data(data, duration, 1);
+ }
+}
+
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
@@ -404,6 +495,72 @@ int contention_begin(u64 *ctx)
pelem->flags = (__u32)ctx[1];
if (needs_callstack) {
+ u32 i = 0;
+ u32 id = 0;
+ int owner_pid;
+ u64 *buf;
+ struct task_struct *task;
+ struct owner_tracing_data *otdata;
+
+ if (!lock_owner)
+ goto skip_owner;
+
+ task = get_lock_owner(pelem->lock, pelem->flags);
+ if (!task)
+ goto skip_owner;
+
+ owner_pid = BPF_CORE_READ(task, pid);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < max_stack; i++)
+ buf[i] = 0x0;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(owner_pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+ id = get_owner_stack_id(buf);
+
+ /*
+ * Contention just happens, or corner case `lock` is owned by process not
+ * `owner_pid`. For the corner case we treat it as unexpected internal error and
+ * just ignore the precvious tracing record.
+ */
+ if (!otdata || otdata->pid != owner_pid) {
+ struct owner_tracing_data first = {
+ .pid = owner_pid,
+ .timestamp = pelem->timestamp,
+ .count = 1,
+ .stack_id = id,
+ };
+ bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY);
+ }
+ /* Contention is ongoing and new waiter joins */
+ else {
+ __sync_fetch_and_add(&otdata->count, 1);
+
+ /*
+ * The owner is the same, but stacktrace might be changed. In this case we
+ * store/update `owner_stat` based on current owner stack id.
+ */
+ if (id != otdata->stack_id) {
+ update_owner_stat(id, pelem->timestamp - otdata->timestamp,
+ pelem->flags);
+
+ otdata->timestamp = pelem->timestamp;
+ otdata->stack_id = id;
+ }
+ }
+skip_owner:
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
@@ -440,6 +597,7 @@ int contention_end(u64 *ctx)
struct tstamp_data *pelem;
struct contention_key key = {};
struct contention_data *data;
+ __u64 timestamp;
__u64 duration;
bool need_delete = false;
@@ -467,12 +625,88 @@ int contention_end(u64 *ctx)
need_delete = true;
}
- duration = bpf_ktime_get_ns() - pelem->timestamp;
+ timestamp = bpf_ktime_get_ns();
+ duration = timestamp - pelem->timestamp;
if ((__s64)duration < 0) {
__sync_fetch_and_add(&time_fail, 1);
goto out;
}
+ if (needs_callstack && lock_owner) {
+ struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+
+ if (!otdata)
+ goto skip_owner;
+
+ /* Update `owner_stat` */
+ update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags);
+
+ /* No contention is occurring, delete `lock` entry in `owner_data` */
+ if (otdata->count <= 1)
+ bpf_map_delete_elem(&owner_data, &pelem->lock);
+ /*
+ * Contention is still ongoing, with a new owner (current task). `owner_data`
+ * should be updated accordingly.
+ */
+ else {
+ u32 i = 0;
+ s32 ret = (s32)ctx[1];
+ u64 *buf;
+
+ otdata->timestamp = timestamp;
+ __sync_fetch_and_add(&otdata->count, -1);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < (u32)max_stack; i++)
+ buf[i] = 0x0;
+
+ /*
+ * `ret` has the return code of the lock function.
+ * If `ret` is negative, the current task terminates lock waiting without
+ * acquiring it. Owner is not changed, but we still need to update the owner
+ * stack.
+ */
+ if (ret < 0) {
+ s32 id = 0;
+ struct task_struct *task;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(otdata->pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf,
+ max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ id = get_owner_stack_id(buf);
+
+ /*
+ * If owner stack is changed, update owner stack id for this lock.
+ */
+ if (id != otdata->stack_id)
+ otdata->stack_id = id;
+ }
+ /*
+ * Otherwise, update tracing data with the current task, which is the new
+ * owner.
+ */
+ else {
+ otdata->pid = pid;
+ /*
+ * We don't want to retrieve callstack here, since it is where the
+ * current task acquires the lock and provides no additional
+ * information. We simply assign -1 to invalidate it.
+ */
+ otdata->stack_id = -1;
+ }
+ }
+ }
+skip_owner:
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
key.stack_id = pelem->stack_id;
@@ -556,14 +790,7 @@ int contention_end(u64 *ctx)
}
found:
- __sync_fetch_and_add(&data->total_time, duration);
- __sync_fetch_and_add(&data->count, 1);
-
- /* FIXME: need atomic operations */
- if (data->max_time < duration)
- data->max_time = duration;
- if (data->min_time > duration)
- data->min_time = duration;
+ update_contention_data(data, duration, 1);
out:
pelem->lock = 0;
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index c15f734d7fc4..15f5743bd409 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -3,6 +3,13 @@
#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
#define UTIL_BPF_SKEL_LOCK_DATA_H
+struct owner_tracing_data {
+ u32 pid; // Who has the lock.
+ u32 count; // How many waiters for this lock.
+ u64 timestamp; // The time while the owner acquires lock and contention is going on.
+ s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks`
+};
+
struct tstamp_data {
u64 timestamp;
u64 lock;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index b80c12c74bbb..7429530fa774 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -25,7 +25,8 @@ struct branch_flags {
u64 spec:2;
u64 new_type:4;
u64 priv:3;
- u64 reserved:31;
+ u64 not_taken:1;
+ u64 reserved:30;
};
};
};
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 0c7564747a14..d7b7eef740b9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -589,9 +589,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
return -ENOMEM;
}
call->ip = cursor_node->ip;
- call->ms = cursor_node->ms;
- call->ms.map = map__get(call->ms.map);
- call->ms.maps = maps__get(call->ms.maps);
+ map_symbol__copy(&call->ms, &cursor_node->ms);
call->srcline = cursor_node->srcline;
if (cursor_node->branch) {
@@ -1094,9 +1092,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->ip = ip;
map_symbol__exit(&node->ms);
- node->ms = *ms;
- node->ms.maps = maps__get(ms->maps);
- node->ms.map = map__get(ms->map);
+ map_symbol__copy(&node->ms, ms);
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
@@ -1564,7 +1560,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
goto out;
*new = *chain;
new->has_children = false;
- new->ms.map = map__get(new->ms.map);
+ map_symbol__copy(&new->ms, &chain->ms);
list_add_tail(&new->list, &head);
}
parent = parent->parent;
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 9a7248dbe2d7..0319546decca 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -30,11 +30,6 @@
extern int perf_use_color_default;
-/*
- * Use this instead of perf_default_config if you need the value of color.ui.
- */
-int perf_color_default_config(const char *var, const char *value, void *cb);
-
int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
int color_vsnprintf(char *bf, size_t size, const char *color,
const char *fmt, va_list args);
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
index dc09ba7cb31e..301031ddc025 100644
--- a/tools/perf/util/color_config.c
+++ b/tools/perf/util/color_config.c
@@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
}
return 0;
}
-
-int perf_color_default_config(const char *var, const char *value,
- void *cb __maybe_unused)
-{
- if (!strcmp(var, "color.ui")) {
- perf_use_color_default = perf_config_colorbool(var, value, -1);
- return 0;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 49b79cf0c5cc..8aa456d7c2cd 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -5,6 +5,8 @@
#include <internal/rc_check.h>
#include <linux/refcount.h>
#include <linux/zalloc.h>
+#include <tools/libc_compat.h> // reallocarray
+
#include "rwsem.h"
DECLARE_RC_STRUCT(comm_str) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index b29109cd3609..6cfecfca16f2 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -4,7 +4,9 @@
#include <stdbool.h>
#include <stddef.h>
+#include <stdio.h>
#include <sys/types.h>
+#include <linux/compiler.h>
#ifdef HAVE_ZSTD_SUPPORT
#include <zstd.h>
#endif
@@ -15,8 +17,26 @@ bool gzip_is_compressed(const char *input);
#endif
#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_stream_to_file(FILE *input, int output_fd);
int lzma_decompress_to_file(const char *input, int output_fd);
bool lzma_is_compressed(const char *input);
+#else
+static inline
+int lzma_decompress_stream_to_file(FILE *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline
+int lzma_decompress_to_file(const char *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline int lzma_is_compressed(const char *input __maybe_unused)
+{
+ return false;
+}
#endif
struct zstd_data {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 2d07c9257a1a..ae72b66b6ded 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -856,12 +856,6 @@ void perf_config__exit(void)
config_set = NULL;
}
-void perf_config__refresh(void)
-{
- perf_config__exit();
- perf_config__init();
-}
-
static void perf_config_item__delete(struct perf_config_item *item)
{
zfree(&item->name);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index a727c95cb119..987b47cf54c3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -49,7 +49,6 @@ void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
void perf_config__exit(void);
-void perf_config__refresh(void);
int perf_config__set_variable(const char *var, const char *value);
/**
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 5c329ad614e9..89570397a4b3 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
struct perf_cpu_map *map;
map = perf_cpu_map__empty_new(data->cpus_data.nr);
- if (map) {
- unsigned i;
-
- for (i = 0; i < data->cpus_data.nr; i++) {
- /*
- * Special treatment for -1, which is not real cpu number,
- * and we need to use (int) -1 to initialize map[i],
- * otherwise it would become 65535.
- */
- if (data->cpus_data.cpu[i] == (u16) -1)
- RC_CHK_ACCESS(map)->map[i].cpu = -1;
- else
- RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
+ if (!map)
+ return NULL;
+
+ for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (data->cpus_data.cpu[i] == (u16) -1) {
+ RC_CHK_ACCESS(map)->map[i].cpu = -1;
+ } else if (data->cpus_data.cpu[i] < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
+ } else {
+ pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
+ perf_cpu_map__put(map);
+ return NULL;
}
}
@@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
int cpu;
perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
- for_each_set_bit(cpu, local_copy, 64)
- RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ for_each_set_bit(cpu, local_copy, 64) {
+ if (cpu + cpus_per_i < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
}
return map;
@@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
RC_CHK_ACCESS(map)->map[i++].cpu = -1;
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
- i++, cpu++)
- RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ i++, cpu++) {
+ if (cpu < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
return map;
}
@@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
- int ret = -1;
+ int max, ret = -1;
/* set up default */
max_cpu_num.cpu = 4096;
@@ -444,10 +462,12 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num.cpu);
+ ret = get_max_num(path, &max);
if (ret)
goto out;
+ max_cpu_num.cpu = max;
+
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) {
@@ -455,8 +475,14 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num.cpu);
+ ret = get_max_num(path, &max);
+ if (!ret && max > INT16_MAX) {
+ pr_err("Read out of bounds max cpus of %d\n", max);
+ ret = -1;
+ }
+ if (!ret)
+ max_present_cpu_num.cpu = (int16_t)max;
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
- struct perf_cpu cpu = { .cpu = INT_MAX };
+ struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map);
if (!last)
@@ -696,7 +722,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
if (!online)
online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
- return online;
+ return perf_cpu_map__get(online);
}
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 0bf9e5c27b59..30f4bb3e7fa3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -506,20 +506,27 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
err = evsel__parse_sample(evsel, event, &sample);
if (err)
- return err;
+ goto out;
cpu = sample.cpu;
if (cpu == -1) {
/* no CPU in the sample - possibly recorded with an old version of perf */
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
- if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
- return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
+ err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ goto out;
+ }
- return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+ err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+out:
+ perf_sample__exit(&sample);
+ return err;
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -1560,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
union perf_event *event = tidq->event_buf;
- struct perf_sample sample = {.ip = 0,};
+ struct perf_sample sample;
+ perf_sample__init(&sample, /*all=*/true);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1598,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
@@ -3151,9 +3160,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
ret = evsel__parse_sample(evsel, event, &sample);
if (ret)
- return ret;
+ goto out;
/*
* Loop through the auxtrace index to find the buffer that matches up with this aux event.
@@ -3168,7 +3178,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
* 1 ('not found')
*/
if (ret != 1)
- return ret;
+ goto out;
}
}
@@ -3178,7 +3188,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
*/
pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
" tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
- return 0;
+ ret = 0;
+out:
+ perf_sample__exit(&sample);
+ return ret;
}
static int cs_etm__queue_aux_records(struct perf_session *session)
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 98661ede2a73..164eb45a0b36 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -158,26 +158,6 @@ out_err:
return ret;
}
-int perf_data__update_dir(struct perf_data *data)
-{
- int i;
-
- if (WARN_ON(!data->is_dir))
- return -EINVAL;
-
- for (i = 0; i < data->dir.nr; i++) {
- struct perf_data_file *file = &data->dir.files[i];
- struct stat st;
-
- if (fstat(file->fd, &st))
- return -1;
-
- file->size = st.st_size;
- }
-
- return 0;
-}
-
static bool check_pipe(struct perf_data *data)
{
struct stat st;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 110f3ebde30f..1438e32e0451 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -97,7 +97,6 @@ int perf_data__switch(struct perf_data *data,
int perf_data__create_dir(struct perf_data *data, int nr);
int perf_data__open_dir(struct perf_data *data);
void perf_data__close_dir(struct perf_data *data);
-int perf_data__update_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
bool has_kcore_dir(const char *path);
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 995f6bb05b5f..f9ef7d045c92 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,8 +46,8 @@ int debug_type_profile;
FILE *debug_file(void)
{
if (!_debug_file) {
- pr_warning_once("debug_file not set");
debug_set_file(stderr);
+ pr_warning_once("debug_file not set");
}
return _debug_file;
}
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index 19acf4775d35..b5deea7cbdf2 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -125,8 +125,12 @@ struct debuginfo *debuginfo__new(const char *path)
dso__put(dso);
out:
+ if (dinfo)
+ return dinfo;
+
/* if failed to open all distro debuginfo, open given binary */
- return dinfo ? : __debuginfo__new(path);
+ symbol__join_symfs(buf, path);
+ return __debuginfo__new(buf);
}
void debuginfo__delete(struct debuginfo *dbg)
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 50c5c206b70e..8f0eb56c6fc6 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -48,7 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
-static int disasm_line__parse_powerpc(struct disasm_line *dl);
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args);
static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
@@ -968,24 +968,25 @@ out:
#define PPC_OP(op) (((op) >> 26) & 0x3F)
#define RAW_BYTES 11
-static int disasm_line__parse_powerpc(struct disasm_line *dl)
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args)
{
char *line = dl->al.line;
const char **namep = &dl->ins.name;
char **rawp = &dl->ops.raw;
char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
char *name = skip_spaces(name_raw_insn + RAW_BYTES);
- int objdump = 0;
+ int disasm = 0;
+ int ret = 0;
- if (strlen(line) > RAW_BYTES)
- objdump = 1;
+ if (args->options->disassembler_used)
+ disasm = 1;
if (name_raw_insn[0] == '\0')
return -1;
- if (objdump) {
- disasm_line__parse(name, namep, rawp);
- } else
+ if (disasm)
+ ret = disasm_line__parse(name, namep, rawp);
+ else
*namep = "";
tmp_raw_insn = strndup(name_raw_insn, 11);
@@ -995,10 +996,10 @@ static int disasm_line__parse_powerpc(struct disasm_line *dl)
remove_spaces(tmp_raw_insn);
sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
- if (objdump)
+ if (disasm)
dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
- return 0;
+ return ret;
}
static void annotation_line__init(struct annotation_line *al,
@@ -1054,7 +1055,7 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
if (args->offset != -1) {
if (arch__is(args->arch, "powerpc")) {
- if (disasm_line__parse_powerpc(dl) < 0)
+ if (disasm_line__parse_powerpc(dl, args) < 0)
goto out_free_line;
} else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
@@ -2289,16 +2290,20 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
switch (dis) {
case PERF_DISASM_LLVM:
+ args->options->disassembler_used = PERF_DISASM_LLVM;
err = symbol__disassemble_llvm(symfs_filename, sym, args);
break;
case PERF_DISASM_CAPSTONE:
+ args->options->disassembler_used = PERF_DISASM_CAPSTONE;
err = symbol__disassemble_capstone(symfs_filename, sym, args);
break;
case PERF_DISASM_OBJDUMP:
+ args->options->disassembler_used = PERF_DISASM_OBJDUMP;
err = symbol__disassemble_objdump(symfs_filename, sym, args);
break;
case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
default:
+ args->options->disassembler_used = PERF_DISASM_UNKNOWN;
goto out_remove_tmp;
}
if (err == 0)
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5c6e85fdae0d..8619b6eea62d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -67,6 +67,7 @@ char dso__symtab_origin(const struct dso *dso)
[DSO_BINARY_TYPE__GUEST_KMODULE] = 'G',
[DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M',
[DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V',
+ [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n',
};
if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND)
@@ -93,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso)
case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
case DSO_BINARY_TYPE__GUEST_KMODULE:
case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -224,6 +226,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__VMLINUX:
case DSO_BINARY_TYPE__GUEST_VMLINUX:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
__symbol__join_symfs(filename, size, dso__long_name(dso));
break;
@@ -490,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
/*
* Global list of open DSOs and the counter.
*/
+struct mutex _dso__data_open_lock;
static LIST_HEAD(dso__data_open);
-static long dso__data_open_cnt;
-static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock);
-static void dso__list_add(struct dso *dso)
+static void dso__data_open_lock_init(void)
+{
+ mutex_init(&_dso__data_open_lock);
+}
+
+static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock)
+{
+ static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&data_open_lock_once, dso__data_open_lock_init);
+
+ return &_dso__data_open_lock;
+}
+
+static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_add_tail(&dso__data(dso)->open_entry, &dso__data_open);
#ifdef REFCNT_CHECKING
@@ -505,11 +522,13 @@ static void dso__list_add(struct dso *dso)
dso__data_open_cnt++;
}
-static void dso__list_del(struct dso *dso)
+static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_del_init(&dso__data(dso)->open_entry);
#ifdef REFCNT_CHECKING
+ mutex_unlock(dso__data_open_lock());
dso__put(dso__data(dso)->dso);
+ mutex_lock(dso__data_open_lock());
#endif
WARN_ONCE(dso__data_open_cnt <= 0,
"DSO data fd counter out of bounds.");
@@ -518,7 +537,7 @@ static void dso__list_del(struct dso *dso)
static void close_first_dso(void);
-static int do_open(char *name)
+static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
char sbuf[STRERR_BUFSIZE];
@@ -545,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename)
}
static int __open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd = -EINVAL;
char *root_dir = (char *)"";
@@ -610,6 +630,7 @@ static void check_data_close(void);
* list/count of open DSO objects.
*/
static int open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
struct nscookie nsc;
@@ -635,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine)
return fd;
}
-static void close_data_fd(struct dso *dso)
+static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (dso__data(dso)->fd >= 0) {
close(dso__data(dso)->fd);
@@ -652,12 +673,12 @@ static void close_data_fd(struct dso *dso)
* Close @dso's data file descriptor and updates
* list/count of open DSO objects.
*/
-static void close_dso(struct dso *dso)
+static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
close_data_fd(dso);
}
-static void close_first_dso(void)
+static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
struct dso_data *dso_data;
struct dso *dso;
@@ -702,7 +723,7 @@ void reset_fd_limit(void)
fd_limit = 0;
}
-static bool may_cache_fd(void)
+static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (!fd_limit)
fd_limit = get_fd_limit();
@@ -718,7 +739,7 @@ static bool may_cache_fd(void)
* for opened dso file descriptors. The limit is half
* of the RLIMIT_NOFILE files opened.
*/
-static void check_data_close(void)
+static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
bool cache_fd = may_cache_fd();
@@ -734,12 +755,13 @@ static void check_data_close(void)
*/
void dso__data_close(struct dso *dso)
{
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
close_dso(dso);
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
static void try_to_open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
enum dso_binary_type binary_type_data[] = {
DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -781,25 +803,27 @@ out:
* returns file descriptor. It should be paired with
* dso__data_put_fd() if it returns non-negative value.
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine)
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
{
+ *fd = -1;
if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR)
- return -1;
+ return false;
- if (pthread_mutex_lock(&dso__data_open_lock) < 0)
- return -1;
+ mutex_lock(dso__data_open_lock());
try_to_open_dso(dso, machine);
- if (dso__data(dso)->fd < 0)
- pthread_mutex_unlock(&dso__data_open_lock);
+ *fd = dso__data(dso)->fd;
+ if (*fd >= 0)
+ return true;
- return dso__data(dso)->fd;
+ mutex_unlock(dso__data_open_lock());
+ return false;
}
void dso__data_put_fd(struct dso *dso __maybe_unused)
{
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
@@ -951,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
{
ssize_t ret;
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -967,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset);
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1075,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine)
struct stat st;
char sbuf[STRERR_BUFSIZE];
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -1099,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine)
dso__data(dso)->file_size = st.st_size;
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1170,6 +1194,68 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
return data_read_write_offset(dso, machine, offset, data, size, true);
}
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine)
+{
+ uint16_t e_machine = EM_NONE;
+ int fd;
+
+ switch (dso__binary_type(dso)) {
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ return EM_HOST;
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ break;
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return EM_NONE;
+ }
+
+ mutex_lock(dso__data_open_lock());
+
+ /*
+ * dso__data(dso)->fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+ fd = dso__data(dso)->fd;
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (dso__needs_swap(dso) == DSO_SWAP__UNSET) {
+ unsigned char eidata;
+
+ if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata))
+ dso__swap_init(dso, eidata);
+ }
+ if (dso__needs_swap(dso) != DSO_SWAP__UNSET &&
+ pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine))
+ e_machine = DSO__SWAP(dso, uint16_t, e_machine);
+ }
+ mutex_unlock(dso__data_open_lock());
+ return e_machine;
+}
+
/**
* dso__data_read_addr - Read data from dso address
* @dso: dso object
@@ -1525,6 +1611,33 @@ void dso__put(struct dso *dso)
RC_CHK_PUT(dso);
}
+int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+ static unsigned int const endian = 1;
+
+ dso__set_needs_swap(dso, DSO_SWAP__NO);
+
+ switch (eidata) {
+ case ELFDATA2LSB:
+ /* We are big endian, DSO is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, DSO is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ default:
+ pr_err("unrecognized DSO data encoding %d\n", eidata);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void dso__set_build_id(struct dso *dso, struct build_id *bid)
{
RC_CHK_ACCESS(dso)->bid = *bid;
@@ -1608,11 +1721,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
enum dso_type dso__type(struct dso *dso, struct machine *machine)
{
- int fd;
+ int fd = -1;
enum dso_type type = DSO__TYPE_UNKNOWN;
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
type = dso__type_fd(fd);
dso__data_put_fd(dso);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index bb8e8f444054..c87564471f9b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -20,30 +20,88 @@ struct perf_env;
#define DSO__NAME_KALLSYMS "[kernel.kallsyms]"
#define DSO__NAME_KCORE "[kernel.kcore]"
+/**
+ * enum dso_binary_type - The kind of DSO generally associated with a memory
+ * region (struct map).
+ */
enum dso_binary_type {
+ /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */
DSO_BINARY_TYPE__KALLSYMS = 0,
+ /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */
DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__VMLINUX,
+ /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__GUEST_VMLINUX,
+ /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */
DSO_BINARY_TYPE__JAVA_JIT,
+ /**
+ * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path
+ * in the .gnu_debuglink ELF section of the dso.
+ */
DSO_BINARY_TYPE__DEBUGLINK,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in
+ * the buildid cache with an elf filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid
+ * located in the buildid cache with a debug filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug
+ * with .debug suffix.
+ */
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo
+ * file in /usr/lib/debug/lib rather than the expected
+ * /usr/lib/debug/usr/lib.
+ */
DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located
+ * in /usr/lib/debug/.build-id/.
+ */
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed
+ * ELF file is placed in a .gnu_debugdata section.
+ */
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */
DSO_BINARY_TYPE__GUEST_KMODULE,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */
DSO_BINARY_TYPE__KCORE,
+ /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */
DSO_BINARY_TYPE__GUEST_KCORE,
+ /**
+ * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg
+ * package debug info.
+ */
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */
DSO_BINARY_TYPE__BPF_IMAGE,
+ /**
+ * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced
+ * instructions or optimized kprobes or ftrace trampolines.
+ */
DSO_BINARY_TYPE__OOL,
+ /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -154,10 +212,12 @@ struct dso_data {
int status;
u32 status_seen;
u64 file_size;
+#ifdef HAVE_LIBUNWIND_SUPPORT
u64 elf_base_addr;
u64 debug_frame_offset;
u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
+#endif
};
struct dso_bpf_prog {
@@ -231,6 +291,8 @@ DECLARE_RC_STRUCT(dso) {
char name[];
};
+extern struct mutex _dso__data_open_lock;
+
/* dso__for_each_symbol - iterate over the symbols of given type
*
* @dso: the 'struct dso *' in which symbols are iterated
@@ -652,7 +714,7 @@ void __dso__inject_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
struct dso *dso__get(struct dso *dso);
-void dso__put(struct dso *dso);
+void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
static inline void __dso__zput(struct dso **dso)
{
@@ -675,6 +737,8 @@ bool dso__sorted_by_name(const struct dso *dso);
void dso__set_sorted_by_name(struct dso *dso);
void dso__sort_by_name(struct dso *dso);
+int dso__swap_init(struct dso *dso, unsigned char eidata);
+
void dso__set_build_id(struct dso *dso, struct build_id *bid);
bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
@@ -732,8 +796,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
* The current usage of the dso__data_* interface is as follows:
*
* Get DSO's fd:
- * int fd = dso__data_get_fd(dso, machine);
- * if (fd >= 0) {
+ * int fd;
+ * if (dso__data_get_fd(dso, machine, &fd)) {
* USE 'fd' SOMEHOW
* dso__data_put_fd(dso);
* }
@@ -755,14 +819,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
*
* TODO
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso);
-void dso__data_close(struct dso *dso);
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
+ EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock);
+void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock);
+void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
int dso__data_file_size(struct dso *dso, struct machine *machine);
off_t dso__data_size(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size);
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
struct machine *machine, u64 addr,
u8 *data, ssize_t size);
@@ -808,7 +874,9 @@ static inline bool dso__is_kcore(const struct dso *dso)
static inline bool dso__is_kallsyms(const struct dso *dso)
{
- return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/';
+ enum dso_binary_type bt = dso__binary_type(dso);
+
+ return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS;
}
bool dso__is_object_file(const struct dso *dso);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index cae4f6d63318..36411749e007 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
}
nr++;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index aac96d5d1917..c23b77f8f854 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -767,6 +767,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->socket = env->cpu[al->cpu].socket_id;
}
+ /* Account for possible out-of-order switch events. */
+ al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+ if (test_bit(al->parallelism, symbol_conf.parallelism_filter))
+ al->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ /*
+ * Multiply it by some const to avoid precision loss or dealing
+ * with floats. The multiplier does not matter otherwise since
+ * we only print it as percents.
+ */
+ al->latency = sample->period * 1000 / al->parallelism;
+
if (al->map) {
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2744c54f404e..664bf39567ce 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -67,9 +67,15 @@ enum {
PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15,
+ PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn"
+
+#define PERF_ADDITIONAL_STATE_MASK \
+ (PERF_IP_FLAG_IN_TX | \
+ PERF_IP_FLAG_INTR_DISABLE | \
+ PERF_IP_FLAG_INTR_TOGGLE)
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
@@ -85,6 +91,10 @@ enum {
PERF_IP_FLAG_VMENTRY |\
PERF_IP_FLAG_VMEXIT)
+#define PERF_IP_FLAG_BRANCH_EVENT_MASK \
+ (PERF_IP_FLAG_BRANCH_MISS | \
+ PERF_IP_FLAG_NOT_TAKEN)
+
#define PERF_MEM_DATA_SRC_NONE \
(PERF_MEM_S(OP, NA) |\
PERF_MEM_S(LVL, NA) |\
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index eabd7913c309..dcff697ed252 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -57,6 +57,8 @@ struct events_stats {
struct hists_stats {
u64 total_period;
u64 total_non_filtered_period;
+ u64 total_latency;
+ u64 total_non_filtered_latency;
u32 nr_samples;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f0dd174e2deb..c1a04141aed0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1373,19 +1373,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
*/
cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
- goto out;
+ return -ENOMEM;
threads = perf_thread_map__new_dummy();
- if (!threads)
- goto out_put;
+ if (!threads) {
+ perf_cpu_map__put(cpus);
+ return -ENOMEM;
+ }
perf_evlist__set_maps(&evlist->core, cpus, threads);
-
perf_thread_map__put(threads);
-out_put:
perf_cpu_map__put(cpus);
-out:
- return -ENOMEM;
+ return 0;
}
int evlist__open(struct evlist *evlist)
@@ -2535,10 +2534,10 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
return;
evlist__for_each_entry(evlist, pos) {
- struct perf_cpu_map *intersect, *to_test;
+ struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online();
const struct perf_pmu *pmu = evsel__find_pmu(pos);
- to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online();
+ to_test = pmu && pmu->is_core ? pmu->cpus : online;
intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
char buf[128];
@@ -2548,6 +2547,7 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
}
perf_cpu_map__put(intersect);
+ perf_cpu_map__put(online);
}
perf_cpu_map__put(user_requested_cpus);
}
@@ -2594,3 +2594,17 @@ bool evlist__has_bpf_output(struct evlist *evlist)
return false;
}
+
+bool evlist__needs_bpf_sb_event(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
+ if (!evsel->core.attr.exclude_kernel)
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index adddb1db1ad2..edcbf1c10e92 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -435,5 +435,6 @@ void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
void evlist__uniquify_name(struct evlist *evlist);
bool evlist__has_bpf_output(struct evlist *evlist);
+bool evlist__needs_bpf_sb_event(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bc144388f892..1974395492d7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -237,6 +237,16 @@ set_methods:
return 0;
}
+const char *evsel__pmu_name(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (pmu)
+ return pmu->name;
+
+ return event_type(evsel->core.attr.type);
+}
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
int __evsel__sample_size(u64 sample_type)
@@ -511,6 +521,16 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
}
evsel->cgrp = cgroup__get(orig->cgrp);
#ifdef HAVE_LIBTRACEEVENT
+ if (orig->tp_sys) {
+ evsel->tp_sys = strdup(orig->tp_sys);
+ if (evsel->tp_sys == NULL)
+ goto out_err;
+ }
+ if (orig->tp_name) {
+ evsel->tp_name = strdup(orig->tp_name);
+ if (evsel->tp_name == NULL)
+ goto out_err;
+ }
evsel->tp_format = orig->tp_format;
#endif
evsel->handler = orig->handler;
@@ -634,7 +654,11 @@ struct tep_event *evsel__tp_format(struct evsel *evsel)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
return NULL;
- tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+ if (!evsel->tp_sys)
+ tp_format = trace_event__tp_format_id(evsel->core.attr.config);
+ else
+ tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+
if (IS_ERR(tp_format)) {
int err = -PTR_ERR(evsel->tp_format);
@@ -3164,17 +3188,19 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
if (type & PERF_SAMPLE_REGS_USER) {
+ struct regs_dump *regs = perf_sample__user_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->user_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->user_regs.abi) {
+ if (regs->abi) {
u64 mask = evsel->core.attr.sample_regs_user;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->user_regs.mask = mask;
- data->user_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3218,19 +3244,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
- data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
if (type & PERF_SAMPLE_REGS_INTR) {
+ struct regs_dump *regs = perf_sample__intr_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->intr_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->core.attr.sample_regs_intr;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->intr_regs.mask = mask;
- data->intr_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3856,10 +3883,10 @@ void evsel__zero_per_pkg(struct evsel *evsel)
*/
bool evsel__is_hybrid(const struct evsel *evsel)
{
- if (perf_pmus__num_core_pmus() == 1)
+ if (!evsel->core.is_pmu_core)
return false;
- return evsel->core.is_pmu_core;
+ return perf_pmus__num_core_pmus() > 1;
}
struct evsel *evsel__leader(const struct evsel *evsel)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5e789fa80590..aae431d63d64 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,7 @@ struct evsel {
bool errored;
bool needs_auxtrace_mmap;
bool default_metricgroup; /* A member of the Default metricgroup */
+ bool needs_uniquify;
struct hashmap *per_pkg_mask;
int err;
int script_output_type;
@@ -236,6 +237,7 @@ int evsel__object_config(size_t object_size,
void (*fini)(struct evsel *evsel));
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel);
+const char *evsel__pmu_name(const struct evsel *evsel);
bool evsel__is_aux_event(const struct evsel *evsel);
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c221dcce6666..6413537442aa 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -215,6 +215,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **data)
{
+ if (!ctx || !id)
+ return -1;
return hashmap__find(ctx->ids, id, data) ? 0 : -1;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 5dee2caba0fe..a9bc47da83a5 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -24,6 +24,8 @@ struct perf_ftrace {
unsigned int bucket_range;
unsigned int min_latency;
unsigned int max_latency;
+ unsigned int bucket_num;
+ bool hide_empty;
int graph_depth;
int func_stack_trace;
int func_irq_info;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d06aa86352d3..e3cdc3b7b4ab 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -44,6 +44,7 @@
#include "build-id.h"
#include "data.h"
#include <api/fs/fs.h>
+#include <api/io_dir.h>
#include "asm/bug.h"
#include "tool.h"
#include "time-utils.h"
@@ -1311,11 +1312,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
{
unsigned int phys, size = 0;
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
#define for_each_memory(mem, dir) \
- while ((ent = readdir(dir))) \
+ while ((ent = io_dir__readdir(&dir)) != NULL) \
if (strcmp(ent->d_name, ".") && \
strcmp(ent->d_name, "..") && \
sscanf(ent->d_name, "memory%u", &mem) == 1)
@@ -1324,9 +1325,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
"%s/devices/system/node/node%lu",
sysfs__mountpoint(), idx);
- dir = opendir(path);
- if (!dir) {
- pr_warning("failed: can't open memory sysfs data\n");
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
+ pr_warning("failed: can't open memory sysfs data '%s'\n", path);
return -1;
}
@@ -1338,20 +1339,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
n->set = bitmap_zalloc(size);
if (!n->set) {
- closedir(dir);
+ close(dir.dirfd);
return -ENOMEM;
}
n->node = idx;
n->size = size;
- rewinddir(dir);
+ io_dir__rewinddir(&dir);
for_each_memory(phys, dir) {
__set_bit(phys, n->set);
}
- closedir(dir);
+ close(dir.dirfd);
return 0;
}
@@ -1374,8 +1375,8 @@ static int memory_node__sort(const void *a, const void *b)
static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
{
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
int ret = 0;
size_t cnt = 0, size = 0;
struct memory_node *nodes = NULL;
@@ -1383,14 +1384,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
scnprintf(path, PATH_MAX, "%s/devices/system/node/",
sysfs__mountpoint());
- dir = opendir(path);
- if (!dir) {
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
__func__, path);
return -1;
}
- while (!ret && (ent = readdir(dir))) {
+ while (!ret && (ent = io_dir__readdir(&dir))) {
unsigned int idx;
int r;
@@ -1419,7 +1420,7 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
cnt += 1;
}
out:
- closedir(dir);
+ close(dir.dirfd);
if (!ret) {
*cntp = cnt;
*nodesp = nodes;
@@ -2769,6 +2770,8 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
free(name);
pmu_num--;
}
+ /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */
+ free(ff->ph->env.pmu_mappings);
ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0f30f843c566..d65228c11412 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_socket(struct hists *hists,
struct hist_entry *he);
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he);
u16 hists__col_len(struct hists *hists, enum hist_column col)
{
@@ -207,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_PARALLELISM, 11);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -302,9 +305,10 @@ static long hist_time(unsigned long htime)
return htime;
}
-static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency)
{
he_stat->period += period;
+ he_stat->latency += latency;
he_stat->nr_events += 1;
}
@@ -319,6 +323,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->weight2 += src->weight2;
dest->weight3 += src->weight3;
dest->nr_events += src->nr_events;
+ dest->latency += src->latency;
}
static void he_stat__decay(struct he_stat *he_stat)
@@ -328,6 +333,7 @@ static void he_stat__decay(struct he_stat *he_stat)
he_stat->weight1 = (he_stat->weight1 * 7) / 8;
he_stat->weight2 = (he_stat->weight2 * 7) / 8;
he_stat->weight3 = (he_stat->weight3 * 7) / 8;
+ he_stat->latency = (he_stat->latency * 7) / 8;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
@@ -335,7 +341,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
{
u64 prev_period = he->stat.period;
- u64 diff;
+ u64 prev_latency = he->stat.latency;
if (prev_period == 0)
return true;
@@ -345,12 +351,16 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
he_stat__decay(he->stat_acc);
decay_callchain(he->callchain);
- diff = prev_period - he->stat.period;
-
if (!he->depth) {
- hists->stats.total_period -= diff;
- if (!he->filtered)
- hists->stats.total_non_filtered_period -= diff;
+ u64 period_diff = prev_period - he->stat.period;
+ u64 latency_diff = prev_latency - he->stat.latency;
+
+ hists->stats.total_period -= period_diff;
+ hists->stats.total_latency -= latency_diff;
+ if (!he->filtered) {
+ hists->stats.total_non_filtered_period -= period_diff;
+ hists->stats.total_non_filtered_latency -= latency_diff;
+ }
}
if (!he->leaf) {
@@ -365,7 +375,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
}
}
- return he->stat.period == 0;
+ return he->stat.period == 0 && he->stat.latency == 0;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
@@ -584,21 +594,24 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
return he;
}
-static u8 symbol__parent_filter(const struct symbol *parent)
+static filter_mask_t symbol__parent_filter(const struct symbol *parent)
{
if (symbol_conf.exclude_other && parent == NULL)
return 1 << HIST_FILTER__PARENT;
return 0;
}
-static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency)
{
if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
return;
he->hists->callchain_period += period;
- if (!he->filtered)
+ he->hists->callchain_latency += latency;
+ if (!he->filtered) {
he->hists->callchain_non_filtered_period += period;
+ he->hists->callchain_non_filtered_latency += latency;
+ }
}
static struct hist_entry *hists__findnew_entry(struct hists *hists,
@@ -611,6 +624,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
struct hist_entry *he;
int64_t cmp;
u64 period = entry->stat.period;
+ u64 latency = entry->stat.latency;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
@@ -629,10 +643,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) {
if (sample_self) {
he_stat__add_stat(&he->stat, &entry->stat);
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period);
+ he_stat__add_period(he->stat_acc, period, latency);
block_info__delete(entry->block_info);
@@ -669,7 +683,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
return NULL;
if (sample_self)
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
hists->nr_entries++;
rb_link_node(&he->rb_node_in, parent, p);
@@ -741,12 +755,14 @@ __hists__add_entry(struct hists *hists,
.ip = al->addr,
.level = al->level,
.code_page_size = sample->code_page_size,
+ .parallelism = al->parallelism,
.stat = {
.nr_events = 1,
.period = sample->period,
.weight1 = sample->weight,
.weight2 = sample->ins_lat,
.weight3 = sample->p_stage_cyc,
+ .latency = al->latency,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
@@ -975,8 +991,6 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
if (he == NULL)
return -ENOMEM;
- hists__inc_nr_samples(hists, he->filtered);
-
out:
iter->he = he;
iter->curr++;
@@ -995,9 +1009,15 @@ static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+
for (int i = 0; i < iter->total; i++)
branch_info__exit(&iter->bi[i]);
+ if (iter->he)
+ hists__inc_nr_samples(hists, iter->he->filtered);
+
zfree(&iter->bi);
iter->he = NULL;
@@ -1365,6 +1385,16 @@ void hist_entry__delete(struct hist_entry *he)
{
struct hist_entry_ops *ops = he->ops;
+ if (symbol_conf.report_hierarchy) {
+ struct rb_root *root = &he->hroot_out.rb_root;
+ struct hist_entry *child, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node)
+ hist_entry__delete(child);
+
+ *root = RB_ROOT;
+ }
+
thread__zput(he->thread);
map_symbol__exit(&he->ms);
@@ -1455,6 +1485,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
if (symbol_conf.sym_list == NULL)
return;
break;
+ case HIST_FILTER__PARALLELISM:
+ if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0)
+ return;
+ break;
case HIST_FILTER__PARENT:
case HIST_FILTER__GUEST:
case HIST_FILTER__HOST:
@@ -1513,6 +1547,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
perf_hpp__is_sym_entry);
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM,
+ perf_hpp__is_parallelism_entry);
+
hists__apply_filters(he->hists, he);
}
@@ -1709,6 +1746,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
hists__filter_entry_by_thread(hists, he);
hists__filter_entry_by_symbol(hists, he);
hists__filter_entry_by_socket(hists, he);
+ hists__filter_entry_by_parallelism(hists, he);
}
int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
@@ -1756,12 +1794,14 @@ static void hists__reset_filter_stats(struct hists *hists)
{
hists->nr_non_filtered_entries = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_non_filtered_latency = 0;
}
void hists__reset_stats(struct hists *hists)
{
hists->nr_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_latency = 0;
hists__reset_filter_stats(hists);
}
@@ -1770,6 +1810,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
{
hists->nr_non_filtered_entries++;
hists->stats.total_non_filtered_period += h->stat.period;
+ hists->stats.total_non_filtered_latency += h->stat.latency;
}
void hists__inc_stats(struct hists *hists, struct hist_entry *h)
@@ -1779,6 +1820,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
hists->nr_entries++;
hists->stats.total_period += h->stat.period;
+ hists->stats.total_latency += h->stat.latency;
}
static void hierarchy_recalc_total_periods(struct hists *hists)
@@ -1790,6 +1832,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
hists->stats.total_period = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_latency = 0;
+ hists->stats.total_non_filtered_latency = 0;
/*
* recalculate total period using top-level entries only
@@ -1801,8 +1845,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
node = rb_next(node);
hists->stats.total_period += he->stat.period;
- if (!he->filtered)
+ hists->stats.total_latency += he->stat.latency;
+ if (!he->filtered) {
hists->stats.total_non_filtered_period += he->stat.period;
+ hists->stats.total_non_filtered_latency += he->stat.latency;
+ }
}
}
@@ -2195,6 +2242,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists,
return false;
}
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (test_bit(he->parallelism, hists->parallelism_filter)) {
+ he->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ return true;
+ }
+ return false;
+}
+
typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
@@ -2364,6 +2421,16 @@ void hists__filter_by_socket(struct hists *hists)
hists__filter_entry_by_socket);
}
+void hists__filter_by_parallelism(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM,
+ hists->parallelism_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__PARALLELISM,
+ hists__filter_entry_by_parallelism);
+}
+
void events_stats__inc(struct events_stats *stats, u32 type)
{
++stats->nr_events[0];
@@ -2759,6 +2826,12 @@ u64 hists__total_period(struct hists *hists)
hists->stats.total_period;
}
+u64 hists__total_latency(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency :
+ hists->stats.total_latency;
+}
+
int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
{
char unit;
@@ -2870,6 +2943,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries = RB_ROOT_CACHED;
mutex_init(&hists->lock);
hists->socket_filter = -1;
+ hists->parallelism_filter = symbol_conf.parallelism_filter;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
return 0;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 46c8373e3146..317d06cca8b8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -31,8 +31,11 @@ enum hist_filter {
HIST_FILTER__HOST,
HIST_FILTER__SOCKET,
HIST_FILTER__C2C,
+ HIST_FILTER__PARALLELISM,
};
+typedef u16 filter_mask_t;
+
enum hist_column {
HISTC_SYMBOL,
HISTC_TIME,
@@ -42,6 +45,7 @@ enum hist_column {
HISTC_CGROUP_ID,
HISTC_CGROUP,
HISTC_PARENT,
+ HISTC_PARALLELISM,
HISTC_CPU,
HISTC_SOCKET,
HISTC_SRCLINE,
@@ -105,10 +109,13 @@ struct hists {
u64 nr_non_filtered_entries;
u64 callchain_period;
u64 callchain_non_filtered_period;
+ u64 callchain_latency;
+ u64 callchain_non_filtered_latency;
struct thread *thread_filter;
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
+ unsigned long *parallelism_filter;
struct mutex lock;
struct hists_stats stats;
u64 event_stream;
@@ -165,6 +172,12 @@ struct res_sample {
struct he_stat {
u64 period;
+ /*
+ * Period re-scaled from CPU time to wall-clock time (divided by the
+ * parallelism at the time of the sample). This represents effect of
+ * the event on latency rather than CPU consumption.
+ */
+ u64 latency;
u64 period_sys;
u64 period_us;
u64 period_guest_sys;
@@ -226,15 +239,16 @@ struct hist_entry {
u64 cgroup;
u64 ip;
u64 transaction;
- s32 socket;
- s32 cpu;
u64 code_page_size;
u64 weight;
u64 ins_lat;
u64 p_stage_cyc;
+ s32 socket;
+ s32 cpu;
+ int parallelism;
+ int mem_type_off;
u8 cpumode;
u8 depth;
- int mem_type_off;
struct simd_flags simd_flags;
/* We are added by hists__add_dummy_entry. */
@@ -242,7 +256,7 @@ struct hist_entry {
bool leaf;
char level;
- u8 filtered;
+ filter_mask_t filtered;
u16 callchain_size;
union {
@@ -368,6 +382,7 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows);
struct hist_entry *hists__get_entry(struct hists *hists, int idx);
u64 hists__total_period(struct hists *hists);
+u64 hists__total_latency(struct hists *hists);
void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
@@ -384,11 +399,13 @@ void hists__filter_by_dso(struct hists *hists);
void hists__filter_by_thread(struct hists *hists);
void hists__filter_by_symbol(struct hists *hists);
void hists__filter_by_socket(struct hists *hists);
+void hists__filter_by_parallelism(struct hists *hists);
static inline bool hists__has_filter(struct hists *hists)
{
return hists->thread_filter || hists->dso_filter ||
- hists->symbol_filter_str || (hists->socket_filter > -1);
+ hists->symbol_filter_str || (hists->socket_filter > -1) ||
+ hists->parallelism_filter;
}
u16 hists__col_len(struct hists *hists, enum hist_column col);
@@ -547,11 +564,13 @@ extern struct perf_hpp_fmt perf_hpp__format[];
enum {
/* Matches perf_hpp__format array. */
PERF_HPP__OVERHEAD,
+ PERF_HPP__LATENCY,
PERF_HPP__OVERHEAD_SYS,
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
+ PERF_HPP__LATENCY_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
PERF_HPP__WEIGHT1,
@@ -563,6 +582,7 @@ enum {
void perf_hpp__init(void);
void perf_hpp__cancel_cumulate(void);
+void perf_hpp__cancel_latency(void);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
@@ -580,6 +600,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
@@ -606,6 +627,7 @@ void hists__reset_column_width(struct hists *hists);
enum perf_hpp_fmt_type {
PERF_HPP_FMT_TYPE__RAW,
PERF_HPP_FMT_TYPE__PERCENT,
+ PERF_HPP_FMT_TYPE__LATENCY,
PERF_HPP_FMT_TYPE__AVERAGE,
};
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index 4acb9bb19b84..3cce77fc8004 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -11,13 +11,13 @@
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -108,20 +108,6 @@ struct hwmon_pmu {
};
/**
- * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
- * represents an event.
- *
- * Related hwmon files start <type><number> that this key represents.
- */
-union hwmon_pmu_event_key {
- long type_and_num;
- struct {
- int num :16;
- enum hwmon_type type :8;
- };
-};
-
-/**
* struct hwmon_pmu_event_value: Value in hwmon_pmu->events.
*
* Hwmon files are of the form <type><number>_<item> and may have a suffix
@@ -249,31 +235,22 @@ static void fix_name(char *p)
static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
{
- DIR *dir;
- struct dirent *ent;
- int dup_fd, err = 0;
+ int err = 0;
struct hashmap_entry *cur, *tmp;
size_t bkt;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
if (pmu->pmu.sysfs_aliases_loaded)
return 0;
- /*
- * Use a dup-ed fd as closedir will close it. Use openat so that the
- * directory contents are refreshed.
- */
- dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY);
-
- if (dup_fd == -1)
- return -ENOMEM;
+ /* Use openat so that the directory contents are refreshed. */
+ io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY));
- dir = fdopendir(dup_fd);
- if (!dir) {
- close(dup_fd);
- return -ENOMEM;
- }
+ if (dir.dirfd < 0)
+ return -ENOENT;
- while ((ent = readdir(dir)) != NULL) {
+ while ((ent = io_dir__readdir(&dir)) != NULL) {
enum hwmon_type type;
int number;
enum hwmon_item item;
@@ -361,7 +338,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
pmu->pmu.sysfs_aliases_loaded = true;
err_out:
- closedir(dir);
+ close(dir.dirfd);
return err;
}
@@ -716,8 +693,8 @@ int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_inf
int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
{
char *line = NULL;
- DIR *class_hwmon_dir;
- struct dirent *class_hwmon_ent;
+ struct io_dirent64 *class_hwmon_ent;
+ struct io_dir class_hwmon_dir;
char buf[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -725,11 +702,12 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
return 0;
scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs);
- class_hwmon_dir = opendir(buf);
- if (!class_hwmon_dir)
+ io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+
+ if (class_hwmon_dir.dirfd < 0)
return 0;
- while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) {
+ while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) {
size_t line_len;
int hwmon_dir, name_fd;
struct io io;
@@ -759,7 +737,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
close(name_fd);
}
free(line);
- closedir(class_hwmon_dir);
+ close(class_hwmon_dir.dirfd);
return 0;
}
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
index 882566846df4..b3329774d2b2 100644
--- a/tools/perf/util/hwmon_pmu.h
+++ b/tools/perf/util/hwmon_pmu.h
@@ -91,6 +91,22 @@ enum hwmon_item {
HWMON_ITEM__MAX,
};
+/**
+ * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
+ * represents an event.
+ * union is exposed for testing to ensure problems are avoided on big
+ * endian machines.
+ *
+ * Related hwmon files start <type><number> that this key represents.
+ */
+union hwmon_pmu_event_key {
+ long type_and_num;
+ struct {
+ int num :16;
+ enum hwmon_type type :8;
+ };
+};
+
bool perf_pmu__is_hwmon(const struct perf_pmu *pmu);
bool evsel__is_hwmon(const struct evsel *evsel);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index a7c589fecb98..3625c6224750 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
int ret;
struct intel_bts *bts = btsq->bts;
union perf_event event;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
if (bts->synth_opts.initial_skip &&
bts->num_events++ <= bts->synth_opts.initial_skip)
return 0;
+ perf_sample__init(&sample, /*all=*/true);
sample.ip = le64_to_cpu(branch->from);
sample.cpumode = intel_bts_cpumode(bts, sample.ip);
sample.pid = btsq->pid;
@@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
pr_err("Intel BTS: failed to deliver branch event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 30be6dfe09eb..4e8a9b172fbc 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1764,12 +1764,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct dummy_branch_stack {
u64 nr;
u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
+ int ret;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
@@ -1777,6 +1778,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_b_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->branches_id;
@@ -1806,8 +1808,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_event(pt, event, &sample,
+ perf_sample__exit(&sample);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
pt->branches_sample_type);
+ return ret;
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1835,11 +1839,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->instructions_id;
@@ -1859,16 +1865,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->instructions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->instructions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
u64 period = 0;
+ int ret;
if (ptq->sample_ipc)
period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
@@ -1876,6 +1885,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
if (!period || intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cycles_id;
@@ -1887,25 +1897,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->transactions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->transactions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static void intel_pt_prep_p_sample(struct intel_pt *pt,
@@ -1953,15 +1969,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_cbr raw;
u32 flags;
+ int ret;
if (intel_pt_skip_cbr_event(pt))
return 0;
ptq->cbr_seen = ptq->state->cbr;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cbr_id;
@@ -1975,20 +1993,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_psb raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->psb_id;
@@ -2001,20 +2023,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_mwait raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->mwait_id;
@@ -2026,20 +2052,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwre raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwre_id;
@@ -2051,20 +2081,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_exstop raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->exstop_id;
@@ -2076,20 +2110,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwrx raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwrx_id;
@@ -2101,8 +2139,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
/*
@@ -2235,16 +2275,18 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt;
u64 sample_type = evsel->core.attr.sample_type;
u8 cpumode;
- u64 regs[8 * sizeof(sample.intr_regs.mask)];
+ u64 regs[8 * sizeof(sample.intr_regs->mask)];
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_a_sample(ptq, event, &sample);
sample.id = id;
@@ -2291,15 +2333,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
items->mask[INTEL_PT_XMM_POS])) {
u64 regs_mask = evsel->core.attr.sample_regs_intr;
u64 *pos;
+ struct regs_dump *intr_regs = perf_sample__intr_regs(&sample);
- sample.intr_regs.abi = items->is_32_bit ?
+ intr_regs->abi = items->is_32_bit ?
PERF_SAMPLE_REGS_ABI_32 :
PERF_SAMPLE_REGS_ABI_64;
- sample.intr_regs.regs = regs;
+ intr_regs->regs = regs;
- pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+ pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask);
- intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ intel_pt_add_xmm(intr_regs, pos, items, regs_mask);
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -2361,7 +2404,9 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
@@ -2407,16 +2452,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct {
struct perf_synth_intel_evt cfe;
struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
} raw;
- int i;
+ int i, ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->evt_id;
@@ -2438,20 +2484,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->evt_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_iflag_chg raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->iflag_chg_id;
@@ -2471,8 +2521,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->iflag_chg_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 50a3c3e07160..2c421b475b3b 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -254,7 +254,7 @@ int tpebs_start(struct evlist *evsel_list)
new = zalloc(sizeof(*new));
if (!new) {
ret = -1;
- zfree(name);
+ zfree(&name);
goto err;
}
new->name = name;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index f23e21502bf8..624964f01b5f 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -516,7 +516,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -535,6 +535,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
out:
+ perf_sample__exit(&sample);
free(event);
return ret;
}
@@ -611,7 +612,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -620,12 +621,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
if (ret)
- return ret;
+ goto out;
ret = jit_inject_event(jd, event);
if (!ret)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index a09f7fe877df..b5d916aa49df 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -168,6 +168,8 @@ int lock_contention_stop(void);
int lock_contention_read(struct lock_contention *con);
int lock_contention_finish(struct lock_contention *con);
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con);
+
#else /* !HAVE_BPF_SKEL */
static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
@@ -187,6 +189,11 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse
return 0;
}
+static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused)
+{
+ return NULL;
+}
+
#endif /* HAVE_BPF_SKEL */
#endif /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index af9a97612f9d..bbcd2ffcf4bd 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret)
}
}
-int lzma_decompress_to_file(const char *input, int output_fd)
+int lzma_decompress_stream_to_file(FILE *infile, int output_fd)
{
lzma_action action = LZMA_RUN;
lzma_stream strm = LZMA_STREAM_INIT;
@@ -41,18 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd)
u8 buf_in[BUFSIZE];
u8 buf_out[BUFSIZE];
- FILE *infile;
-
- infile = fopen(input, "rb");
- if (!infile) {
- pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
- return -1;
- }
ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK) {
pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret);
- goto err_fclose;
+ return err;
}
strm.next_in = NULL;
@@ -100,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd)
err = 0;
err_lzma_end:
lzma_end(&strm);
-err_fclose:
- fclose(infile);
return err;
}
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+ FILE *infile;
+ int ret;
+
+ infile = fopen(input, "rb");
+ if (!infile) {
+ pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
+ return -1;
+ }
+
+ ret = lzma_decompress_stream_to_file(infile, output_fd);
+ fclose(infile);
+ return ret;
+}
+
bool lzma_is_compressed(const char *input)
{
int fd = open(input, O_RDONLY);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2d51badfbf2e..2531b373f2cf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -37,6 +37,7 @@
#include <internal/lib.h> // page_size
#include "cgroup.h"
#include "arm64-frame-pointer-unwind-support.h"
+#include <api/io_dir.h>
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -94,6 +95,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
machine->vmlinux_map = NULL;
+ /* There is no initial context switch in, so we start at 1. */
+ machine->parallelism = 1;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -677,8 +680,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
+ machine->parallelism += out ? -1 : 1;
return 0;
}
@@ -712,7 +718,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map__set_start(map, event->ksymbol.addr);
map__set_end(map, map__start(map) + event->ksymbol.len);
- err = maps__insert(machine__kernel_maps(machine), map);
+ err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map);
if (err) {
err = -ENOMEM;
goto out;
@@ -773,6 +779,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
if (dump_trace)
perf_event__fprintf_ksymbol(event, stdout);
+ /* no need to process non-JIT BPF as it cannot get samples */
+ if (event->ksymbol.len == 0)
+ return 0;
+
if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
return machine__process_ksymbol_unregister(machine, event,
sample);
@@ -886,26 +896,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
return ret;
}
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
-{
- int i;
- size_t printed = 0;
- struct dso *kdso = machine__kernel_dso(machine);
-
- if (dso__has_build_id(kdso)) {
- char filename[PATH_MAX];
-
- if (dso__build_id_filename(kdso, filename, sizeof(filename), false))
- printed += fprintf(fp, "[0] %s\n", filename);
- }
-
- for (i = 0; i < vmlinux_path__nr_entries; ++i) {
- printed += fprintf(fp, "[%d] %s\n", i + dso__has_build_id(kdso),
- vmlinux_path[i]);
- }
- return printed;
-}
-
struct machine_fprintf_cb_args {
FILE *fp;
size_t printed;
@@ -1352,27 +1342,24 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
return 0;
}
-static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
+static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth)
{
- struct dirent *dent;
- DIR *dir = opendir(dir_name);
+ struct io_dirent64 *dent;
+ struct io_dir iod;
+ size_t root_len = strlen(path);
int ret = 0;
- if (!dir) {
- pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0) {
+ pr_debug("%s: cannot open %s dir\n", __func__, path);
return -1;
}
-
- while ((dent = readdir(dir)) != NULL) {
- char path[PATH_MAX];
- struct stat st;
-
- /*sshfs might return bad dent->d_type, so we have to stat*/
- path__join(path, sizeof(path), dir_name, dent->d_name);
- if (stat(path, &st))
- continue;
-
- if (S_ISDIR(st.st_mode)) {
+ /* Bounds check, should never happen. */
+ if (root_len >= path_size)
+ return -1;
+ path[root_len++] = '/';
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
+ if (io_dir__is_dir(&iod, dent)) {
if (!strcmp(dent->d_name, ".") ||
!strcmp(dent->d_name, ".."))
continue;
@@ -1384,7 +1371,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
continue;
}
- ret = maps__set_modules_path_dir(maps, path, depth + 1);
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) >= path_size)
+ continue;
+
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1);
if (ret < 0)
goto out;
} else {
@@ -1394,9 +1386,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
if (ret)
goto out;
- if (m.kmod)
- ret = maps__set_module_path(maps, path, &m);
+ if (m.kmod) {
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) < path_size) {
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_module_path(maps, path, &m);
+ }
+ }
zfree(&m.name);
if (ret)
@@ -1405,7 +1402,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
}
out:
- closedir(dir);
+ close(iod.dirfd);
return ret;
}
@@ -1422,7 +1419,8 @@ static int machine__set_modules_path(struct machine *machine)
machine->root_dir, version);
free(version);
- return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
+ return maps__set_modules_path_dir(machine__kernel_maps(machine),
+ modules_path, sizeof(modules_path), 0);
}
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
u64 *size __maybe_unused,
@@ -1468,8 +1466,6 @@ static int machine__create_modules(struct machine *machine)
if (modules__parse(modules, machine, machine__create_module))
return -1;
- maps__fixup_end(machine__kernel_maps(machine));
-
if (!machine__set_modules_path(machine))
return 0;
@@ -1563,6 +1559,8 @@ int machine__create_kernel_maps(struct machine *machine)
}
}
+ maps__fixup_end(machine__kernel_maps(machine));
+
out_put:
dso__put(kernel);
return ret;
@@ -1900,6 +1898,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
+ /* There is no context switch out before exit, so we decrement here. */
+ machine->parallelism--;
if (thread != NULL) {
if (symbol_conf.keep_exited_threads)
thread__set_exited(thread, /*exited=*/true);
@@ -2929,8 +2929,8 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
return 0;
/* Bail out if nothing was captured. */
- if ((!sample->user_regs.regs) ||
- (!sample->user_stack.size))
+ if (!sample->user_regs || !sample->user_regs->regs ||
+ !sample->user_stack.size)
return 0;
if (!symbols)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 2e5a4cb342d8..b56abec84fed 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -50,6 +50,12 @@ struct machine {
u64 text_start;
u64 text_end;
} sched, lock, traceiter, trace;
+ /*
+ * The current parallelism level (number of threads that run on CPUs).
+ * This value can be less than 1, or larger than the total number
+ * of CPUs, if events are poorly ordered.
+ */
+ int parallelism;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */
@@ -266,8 +272,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid);
int machines__create_guest_kernel_maps(struct machines *machines);
void machines__destroy_kernel_maps(struct machines *machines);
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
-
typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 09c9cc326c08..0b40d901675e 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -428,11 +428,29 @@ static unsigned int maps__by_name_index(const struct maps *maps, const struct ma
return -1;
}
+static void map__set_kmap_maps(struct map *map, struct maps *maps)
+{
+ struct dso *dso;
+
+ if (map == NULL)
+ return;
+
+ dso = map__dso(map);
+
+ if (dso && dso__kernel(dso)) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+}
+
static int __maps__insert(struct maps *maps, struct map *new)
{
struct map **maps_by_address = maps__maps_by_address(maps);
struct map **maps_by_name = maps__maps_by_name(maps);
- const struct dso *dso = map__dso(new);
unsigned int nr_maps = maps__nr_maps(maps);
unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated;
@@ -483,14 +501,9 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- if (dso && dso__kernel(dso)) {
- struct kmap *kmap = map__kmap(new);
- if (kmap)
- kmap->kmaps = maps;
- else
- pr_err("Internal error: kernel dso with non kernel map\n");
- }
+ map__set_kmap_maps(new, maps);
+
return 0;
}
@@ -785,6 +798,9 @@ static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_ind
}
RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add;
maps__set_maps_by_name_sorted(maps, false);
+ map__set_kmap_maps(new1, maps);
+ map__set_kmap_maps(new2, maps);
+
check_invariants(maps);
return 0;
}
@@ -797,7 +813,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
{
int err = 0;
FILE *fp = debug_file();
- unsigned int i;
+ unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name...
if (!maps__maps_by_address_sorted(maps))
__maps__sort_by_address(maps);
@@ -808,6 +824,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) {
struct map **maps_by_address = maps__maps_by_address(maps);
+ struct map **maps_by_name = maps__maps_by_name(maps);
struct map *pos = maps_by_address[i];
struct map *before = NULL, *after = NULL;
@@ -827,6 +844,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
map__fprintf(pos, fp);
}
+ if (maps_by_name)
+ ni = maps__by_name_index(maps, pos);
+
/*
* Now check if we need to create new maps for areas not
* overlapped by the new map:
@@ -871,6 +891,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(before);
+ }
+
/* Maps are still ordered, go to next one. */
i++;
if (after) {
@@ -892,6 +918,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
err = __maps__insert_sorted(maps, i + 1, after, NULL);
map__put(after);
check_invariants(maps);
@@ -910,6 +942,14 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
+ map__set_kmap_maps(new, maps);
+
check_invariants(maps);
return err;
}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3692e988c86e..884d9aebce91 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -31,9 +31,6 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
-static char mem_loads_name[100];
-static char mem_stores_name[100];
-
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
{
if (i >= PERF_MEM_EVENTS__MAX || !pmu)
@@ -81,7 +78,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
return num;
}
-static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
+static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
+ char *buf, size_t buf_size)
{
struct perf_mem_event *e;
@@ -96,31 +94,31 @@ static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
if (e->ldlat) {
if (!e->aux_event) {
/* ARM and Most of Intel */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name,
perf_mem_events__loads_ldlat);
} else {
/* Intel with mem-loads-aux event */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name, pmu->name,
perf_mem_events__loads_ldlat);
}
} else {
if (!e->aux_event) {
/* AMD and POWER */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- } else
+ } else {
return NULL;
+ }
}
-
- return mem_loads_name;
+ return buf;
}
if (i == PERF_MEM_EVENTS__STORE) {
- scnprintf(mem_stores_name, sizeof(mem_stores_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- return mem_stores_name;
+ return buf;
}
return NULL;
@@ -189,7 +187,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu
if (!e->event_name)
return true;
- scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name);
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name);
return !stat(path, &st);
}
@@ -238,69 +236,87 @@ void perf_pmu__mem_events_list(struct perf_pmu *pmu)
int j;
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ char buf[128];
struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
fprintf(stderr, "%-*s%-*s%s",
e->tag ? 13 : 0,
e->tag ? : "",
e->tag && verbose > 0 ? 25 : 0,
- e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "",
+ e->tag && verbose > 0
+ ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))
+ : "",
e->supported ? ": available\n" : "");
}
}
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out)
{
const char *mnt = sysfs__mount();
struct perf_pmu *pmu = NULL;
- struct perf_mem_event *e;
int i = *argv_nr;
- const char *s;
- char *copy;
struct perf_cpu_map *cpu_map = NULL;
- int ret;
+ size_t event_name_storage_size =
+ perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128;
+ size_t event_name_storage_remaining = event_name_storage_size;
+ char *event_name_storage = malloc(event_name_storage_size);
+ char *event_name_storage_ptr = event_name_storage;
+
+ if (!event_name_storage)
+ return -ENOMEM;
+ *event_name_storage_out = NULL;
while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- e = perf_pmu__mem_events_ptr(pmu, j);
+ const char *s;
+ struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
+ int ret;
if (!perf_mem_record[j])
continue;
if (!e->supported) {
+ char buf[128];
+
pr_err("failed: event '%s' not supported\n",
- perf_pmu__mem_events_name(j, pmu));
+ perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)));
+ free(event_name_storage);
return -1;
}
- s = perf_pmu__mem_events_name(j, pmu);
+ s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr,
+ event_name_storage_remaining);
if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
continue;
- copy = strdup(s);
- if (!copy)
- return -1;
-
rec_argv[i++] = "-e";
- rec_argv[i++] = copy;
+ rec_argv[i++] = event_name_storage_ptr;
+ event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1;
+ event_name_storage_ptr += strlen(event_name_storage_ptr) + 1;
ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
- if (ret < 0)
+ if (ret < 0) {
+ free(event_name_storage);
return ret;
+ }
}
}
if (cpu_map) {
- if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (!perf_cpu_map__equal(cpu_map, online)) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
+ perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
*argv_nr = i;
+ *event_name_storage_out = event_name_storage;
return 0;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 8dc27db9fd52..a5c19d39ee37 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -38,7 +38,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
bool is_mem_loads_aux_event(struct evsel *leader);
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **event_name_storage_out);
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 43b02293f1d2..a34726219af3 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -244,9 +244,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
int idx, nr_cpus;
struct perf_cpu cpu;
- const struct perf_cpu_map *cpu_map = NULL;
+ struct perf_cpu_map *cpu_map = cpu_map__online();
- cpu_map = cpu_map__online();
if (!cpu_map)
return;
@@ -256,6 +255,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
if (cpu__get_node(cpu) == node)
__set_bit(cpu.cpu, mask->bits);
}
+ perf_cpu_map__put(cpu_map);
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
@@ -356,14 +356,3 @@ int perf_mmap__push(struct mmap *md, void *to,
out:
return rc;
}
-
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone)
-{
- clone->nbits = original->nbits;
- clone->bits = bitmap_zalloc(original->nbits);
- if (!clone->bits)
- return -ENOMEM;
-
- memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original));
- return 0;
-}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 0df6e1621c7e..4d72c5fa5084 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -61,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map);
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
- struct mmap_cpu_mask *clone);
-
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 40661120cacc..62d258c71ded 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -33,6 +33,12 @@
/* Documents if a type is a lockable type. */
#define LOCKABLE __attribute__((lockable))
+/* Documents a function that expects a lock not to be held prior to entry. */
+#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__)))
+
+/* Documents a function that returns a lock. */
+#define LOCK_RETURNED(x) __attribute__((lock_returned(x)))
+
/* Documents functions that acquire a lock in the body of a function, and do not release it. */
#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
@@ -57,6 +63,8 @@
#define GUARDED_BY(x)
#define PT_GUARDED_BY(x)
#define LOCKABLE
+#define LOCKS_EXCLUDED(...)
+#define LOCK_RETURNED(x)
#define EXCLUSIVE_LOCK_FUNCTION(...)
#define UNLOCK_FUNCTION(...)
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 1e23faa364b1..5152fd5a6ead 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,7 @@
#include "strbuf.h"
#include "debug.h"
#include <api/fs/tracing_path.h>
+#include <api/io_dir.h>
#include <perf/cpumap.h>
#include <util/parse-events-bison.h>
#include <util/parse-events-flex.h>
@@ -554,8 +555,8 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
char *evt_path;
- struct dirent *evt_ent;
- DIR *evt_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir evt_dir;
int ret = 0, found = 0;
evt_path = get_events_file(sys_name);
@@ -563,14 +564,14 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- evt_dir = opendir(evt_path);
- if (!evt_dir) {
+ io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (evt_dir.dirfd < 0) {
put_events_file(evt_path);
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (evt_ent = readdir(evt_dir))) {
+ while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) {
if (!strcmp(evt_ent->d_name, ".")
|| !strcmp(evt_ent->d_name, "..")
|| !strcmp(evt_ent->d_name, "enable")
@@ -592,7 +593,7 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
}
put_events_file(evt_path);
- closedir(evt_dir);
+ close(evt_dir.dirfd);
return ret;
}
@@ -615,17 +616,23 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
struct parse_events_error *err,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
- struct dirent *events_ent;
- DIR *events_dir;
+ struct io_dirent64 *events_ent;
+ struct io_dir events_dir;
int ret = 0;
+ char *events_dir_path = get_tracing_file("events");
- events_dir = tracing_events__opendir();
- if (!events_dir) {
+ if (!events_dir_path) {
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+ io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ put_events_file(events_dir_path);
+ if (events_dir.dirfd < 0) {
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (events_ent = readdir(events_dir))) {
+ while (!ret && (events_ent = io_dir__readdir(&events_dir))) {
if (!strcmp(events_ent->d_name, ".")
|| !strcmp(events_ent->d_name, "..")
|| !strcmp(events_ent->d_name, "enable")
@@ -639,8 +646,7 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
ret = add_tracepoint_event(parse_state, list, events_ent->d_name,
evt_name, err, head_config, loc);
}
-
- closedir(events_dir);
+ close(events_dir.dirfd);
return ret;
}
@@ -1660,7 +1666,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
if (!parse_events__filter_pmu(parse_state, pmu) &&
- perf_pmu__match(pmu, event_or_pmu)) {
+ perf_pmu__wildcard_match(pmu, event_or_pmu)) {
bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, *listp, pmu,
@@ -1974,48 +1980,55 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
int *force_grouped_idx = _fg_idx;
int lhs_sort_idx, rhs_sort_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
- bool lhs_has_group, rhs_has_group;
/*
- * First sort by grouping/leader. Read the leader idx only if the evsel
- * is part of a group, by default ungrouped events will be sorted
- * relative to grouped events based on where the first ungrouped event
- * occurs. If both events don't have a group we want to fall-through to
- * the arch specific sorting, that can reorder and fix things like
- * Intel's topdown events.
+ * Get the indexes of the 2 events to sort. If the events are
+ * in groups then the leader's index is used otherwise the
+ * event's index is used. An index may be forced for events that
+ * must be in the same group, namely Intel topdown events.
*/
- if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
- lhs_has_group = true;
- lhs_sort_idx = lhs_core->leader->idx;
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
+ lhs_sort_idx = *force_grouped_idx;
} else {
- lhs_has_group = false;
- lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
- ? *force_grouped_idx
- : lhs_core->idx;
- }
- if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
- rhs_has_group = true;
- rhs_sort_idx = rhs_core->leader->idx;
+ bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
+
+ lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx;
+ }
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
+ rhs_sort_idx = *force_grouped_idx;
} else {
- rhs_has_group = false;
- rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
- ? *force_grouped_idx
- : rhs_core->idx;
+ bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
+
+ rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx;
}
+ /* If the indices differ then respect the insertion order. */
if (lhs_sort_idx != rhs_sort_idx)
return lhs_sort_idx - rhs_sort_idx;
- /* Group by PMU if there is a group. Groups can't span PMUs. */
- if (lhs_has_group && rhs_has_group) {
- lhs_pmu_name = lhs->group_pmu_name;
- rhs_pmu_name = rhs->group_pmu_name;
- ret = strcmp(lhs_pmu_name, rhs_pmu_name);
- if (ret)
- return ret;
- }
+ /*
+ * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should
+ * be in the same group. Events in the same group need to be ordered by
+ * their grouping PMU name as the group will be broken to ensure only
+ * events on the same PMU are programmed together.
+ *
+ * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both
+ * events are being forced to be at force_group_index. If only one event
+ * is being forced then the other event is the group leader of the group
+ * we're trying to force the event into. Ensure for the force grouped
+ * case that the PMU name ordering is also respected.
+ */
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
+ ret = strcmp(lhs_pmu_name, rhs_pmu_name);
+ if (ret)
+ return ret;
- /* Architecture specific sorting. */
+ /*
+ * Architecture specific sorting, by default sort events in the same
+ * group with the same PMU by their insertion index. On Intel topdown
+ * constraints must be adhered to - slots first, etc.
+ */
return arch_evlist__cmp(lhs, rhs);
}
@@ -2024,9 +2037,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
int idx = 0, force_grouped_idx = -1;
struct evsel *pos, *cur_leader = NULL;
struct perf_evsel *cur_leaders_grp = NULL;
- bool idx_changed = false, cur_leader_force_grouped = false;
+ bool idx_changed = false;
int orig_num_leaders = 0, num_leaders = 0;
int ret;
+ struct evsel *force_grouped_leader = NULL;
+ bool last_event_was_forced_leader = false;
/*
* Compute index to insert ungrouped events at. Place them where the
@@ -2049,10 +2064,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
pos->core.idx = idx++;
- /* Remember an index to sort all forced grouped events together to. */
- if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
- arch_evsel__must_be_in_group(pos))
- force_grouped_idx = pos->core.idx;
+ /*
+ * Remember an index to sort all forced grouped events
+ * together to. Use the group leader as some events
+ * must appear first within the group.
+ */
+ if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos_leader->core.idx;
}
/* Sort events. */
@@ -2080,31 +2098,66 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
* Set the group leader respecting the given groupings and that
* groups can't span PMUs.
*/
- if (!cur_leader)
+ if (!cur_leader) {
cur_leader = pos;
+ cur_leaders_grp = &pos->core;
+ if (pos_force_grouped)
+ force_grouped_leader = pos;
+ }
cur_leader_pmu_name = cur_leader->group_pmu_name;
- if ((cur_leaders_grp != pos->core.leader &&
- (!pos_force_grouped || !cur_leader_force_grouped)) ||
- strcmp(cur_leader_pmu_name, pos_pmu_name)) {
- /* Event is for a different group/PMU than last. */
+ if (strcmp(cur_leader_pmu_name, pos_pmu_name)) {
+ /* PMU changed so the group/leader must change. */
cur_leader = pos;
- /*
- * Remember the leader's group before it is overwritten,
- * so that later events match as being in the same
- * group.
- */
cur_leaders_grp = pos->core.leader;
+ if (pos_force_grouped && force_grouped_leader == NULL)
+ force_grouped_leader = pos;
+ } else if (cur_leaders_grp != pos->core.leader) {
+ bool split_even_if_last_leader_was_forced = true;
+
/*
- * Avoid forcing events into groups with events that
- * don't need to be in the group.
+ * Event is for a different group. If the last event was
+ * the forced group leader then subsequent group events
+ * and forced events should be in the same group. If
+ * there are no other forced group events then the
+ * forced group leader wasn't really being forced into a
+ * group, it just set arch_evsel__must_be_in_group, and
+ * we don't want the group to split here.
*/
- cur_leader_force_grouped = pos_force_grouped;
+ if (force_grouped_idx != -1 && last_event_was_forced_leader) {
+ struct evsel *pos2 = pos;
+ /*
+ * Search the whole list as the group leaders
+ * aren't currently valid.
+ */
+ list_for_each_entry_continue(pos2, list, core.node) {
+ if (pos->core.leader == pos2->core.leader &&
+ arch_evsel__must_be_in_group(pos2)) {
+ split_even_if_last_leader_was_forced = false;
+ break;
+ }
+ }
+ }
+ if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) {
+ if (pos_force_grouped) {
+ if (force_grouped_leader) {
+ cur_leader = force_grouped_leader;
+ cur_leaders_grp = force_grouped_leader->core.leader;
+ } else {
+ cur_leader = force_grouped_leader = pos;
+ cur_leaders_grp = &pos->core;
+ }
+ } else {
+ cur_leader = pos;
+ cur_leaders_grp = pos->core.leader;
+ }
+ }
}
if (pos_leader != cur_leader) {
/* The leader changed so update it. */
evsel__set_leader(pos, cur_leader);
}
+ last_event_was_forced_leader = (force_grouped_leader == pos);
}
list_for_each_entry(pos, list, core.node) {
struct evsel *pos_leader = evsel__leader(pos);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index bf7f73548605..7ed86e3e34e3 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,21 +53,25 @@ static int str(yyscan_t scanner, int token)
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
- if (text[0] != '\'') {
- yylval->str = strdup(text);
- } else {
- /*
- * If a text tag specified on the command line
- * contains opening single quite ' then it is
- * expected that the tag ends with single quote
- * as well, like this:
- * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
- * quotes need to be escaped to bypass shell
- * processing.
- */
- yylval->str = strndup(&text[1], strlen(text) - 2);
- }
+ yylval->str = strdup(text);
+ return token;
+}
+
+static int quoted_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+ /*
+ * If a text tag specified on the command line
+ * contains opening single quite ' then it is
+ * expected that the tag ends with single quote
+ * as well, like this:
+ * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+ * quotes need to be escaped to bypass shell
+ * processing.
+ */
+ yylval->str = strndup(&text[1], strlen(text) - 2);
return token;
}
@@ -235,9 +239,16 @@ event [^,{}/]+
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]{1,16}
num_raw_hex [a-fA-F0-9]{1,16}
-name [a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]*
-name_tag [\'][a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
-name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+/* Regular pattern to match the token PE_NAME. */
+name_start [a-zA-Z0-9_*?\[\]]
+name {name_start}[a-zA-Z0-9_*?.\[\]!\-]*
+/* PE_NAME token when inside a config term list, allows ':'. */
+term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]*
+/*
+ * PE_NAME token when quoted, allows ':,.='.
+ * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'.
+ */
+quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\']
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/*
* If you add a modifier you need to update check_modifier().
@@ -341,7 +352,9 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
{lc_type} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
-{name_minus} { return str(yyscanner, PE_NAME); }
+{num_dec} { return value(_parse_state, yyscanner, 10); }
+{num_hex} { return value(_parse_state, yyscanner, 16); }
+{term_name} { return str(yyscanner, PE_NAME); }
@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
}
@@ -410,7 +423,7 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); }
{modifier_event} { return modifiers(_parse_state, yyscanner); }
{name} { return str(yyscanner, PE_NAME); }
-{name_tag} { return str(yyscanner, PE_NAME); }
+{quoted_name} { return quoted_str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
, { BEGIN(event); return ','; }
: { return ':'; }
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index c7f3543b9921..66b666d9ce64 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type)
{
- if (pmu)
- return pmu->name;
-
switch (type) {
ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
- ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+ case PERF_TYPE_RAW:
+ return pmu ? pmu->name : "PERF_TYPE_RAW";
default:
- return NULL;
+ return pmu ? pmu->name : NULL;
}
}
static const char *stringify_perf_hw_id(u64 value)
{
- switch (value) {
+ switch (value & PERF_HW_EVENT_MASK) {
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
@@ -169,79 +167,100 @@ static const char *stringify_perf_sw_id(u64 value)
}
#undef ENUM_ID_TO_STR_CASE
-#define PRINT_ID(_s, _f) \
-do { \
- const char *__s = _s; \
- if (__s == NULL) \
- snprintf(buf, size, _f, value); \
- else \
- snprintf(buf, size, _f" (%s)", value, __s); \
-} while (0)
-#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
-#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
+static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%"PRIu64, value);
+ else
+ snprintf(buf, size, "%"PRIu64" (%s)", value, s);
+}
+
+static void print_id_hex(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%#"PRIx64, value);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s)", value, s);
+}
-static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value)
+static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type)
{
- print_id_unsigned(stringify_perf_type_id(pmu, value));
+ print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type));
}
-static void __p_config_hw_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- print_id_hex(stringify_perf_hw_id(value));
+ const char *name = stringify_perf_hw_id(config);
+
+ if (name == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
+ } else {
+ if (pmu == NULL)
+ snprintf(buf, size, "%#"PRIx64" (%s)", config, name);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name);
+ }
}
-static void __p_config_sw_id(char *buf, size_t size, u64 value)
+static void __p_config_sw_id(char *buf, size_t size, u64 id)
{
- print_id_hex(stringify_perf_sw_id(value));
+ print_id_hex(buf, size, id, stringify_perf_sw_id(id));
}
-static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+ const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff);
const char *hw_cache_op_str =
- stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+ stringify_perf_hw_cache_op_id((config & 0xff00) >> 8);
const char *hw_cache_op_result_str =
- stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
-
- if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
- hw_cache_op_result_str == NULL) {
- snprintf(buf, size, "%#"PRIx64, value);
+ stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16);
+
+ if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
} else {
- snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
- hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ }
}
}
-static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 id)
{
- char *str = tracepoint_id_to_name(value);
+ char *str = tracepoint_id_to_name(id);
- print_id_hex(str);
+ print_id_hex(buf, size, id, str);
free(str);
}
-static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
+static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config)
{
- const char *name = perf_pmu__name_from_config(pmu, value);
-
- if (name) {
- print_id_hex(name);
- return;
- }
switch (type) {
case PERF_TYPE_HARDWARE:
- return __p_config_hw_id(buf, size, value);
+ return __p_config_hw_id(buf, size, pmu, config);
case PERF_TYPE_SOFTWARE:
- return __p_config_sw_id(buf, size, value);
+ return __p_config_sw_id(buf, size, config);
case PERF_TYPE_HW_CACHE:
- return __p_config_hw_cache_id(buf, size, value);
+ return __p_config_hw_cache_id(buf, size, pmu, config);
case PERF_TYPE_TRACEPOINT:
- return __p_config_tracepoint_id(buf, size, value);
+ return __p_config_tracepoint_id(buf, size, config);
case PERF_TYPE_RAW:
case PERF_TYPE_BREAKPOINT:
default:
- snprintf(buf, size, "%#"PRIx64, value);
- return;
+ return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config));
}
}
@@ -253,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
-#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val)
+#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val)
#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val)
#define PRINT_ATTRn(_n, _f, _p, _a) \
@@ -273,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
char buf[BUF_SIZE];
int ret = 0;
+ if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) {
+ u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT;
+
+ if (extended_type)
+ pmu = perf_pmus__find_by_type(extended_type);
+ }
+
PRINT_ATTRn("type", type, p_type_id, true);
PRINT_ATTRf(size, p_unsigned);
PRINT_ATTRn("config", config, p_config_id, true);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6206c8fe2bf9..b7ebac5ab1d1 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -13,6 +13,7 @@
#include <dirent.h>
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <locale.h>
#include <fnmatch.h>
#include <math.h>
@@ -36,12 +37,12 @@
#define UNIT_MAX_LEN 31 /* max length for event unit name */
enum event_source {
- /* An event loaded from /sys/devices/<pmu>/events. */
+ /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */
EVENT_SRC_SYSFS,
/* An event loaded from a CPUID matched json file. */
EVENT_SRC_CPU_JSON,
/*
- * An event loaded from a /sys/devices/<pmu>/identifier matched json
+ * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json
* file.
*/
EVENT_SRC_SYS_JSON,
@@ -195,19 +196,17 @@ static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_fo
*/
static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
{
- struct dirent *evt_ent;
- DIR *format_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir format_dir;
int ret = 0;
- format_dir = fdopendir(dirfd);
- if (!format_dir)
- return -EINVAL;
+ io_dir__init(&format_dir, dirfd);
- while ((evt_ent = readdir(format_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) {
struct perf_pmu_format *format;
char *name = evt_ent->d_name;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&format_dir, evt_ent))
continue;
format = perf_pmu__new_format(&pmu->format, name);
@@ -234,7 +233,7 @@ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_lo
}
}
- closedir(format_dir);
+ close(format_dir.dirfd);
return ret;
}
@@ -596,7 +595,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
};
if (pmu_events_table__find_event(pmu->events_table, pmu, name,
update_alias, &data) == 0)
- pmu->cpu_json_aliases++;
+ pmu->cpu_common_json_aliases++;
}
pmu->sysfs_aliases++;
break;
@@ -635,14 +634,12 @@ static inline bool pmu_alias_info_file(const char *name)
*/
static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
{
- struct dirent *evt_ent;
- DIR *event_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir event_dir;
- event_dir = fdopendir(events_dir_fd);
- if (!event_dir)
- return -EINVAL;
+ io_dir__init(&event_dir, events_dir_fd);
- while ((evt_ent = readdir(event_dir))) {
+ while ((evt_ent = io_dir__readdir(&event_dir))) {
char *name = evt_ent->d_name;
int fd;
FILE *file;
@@ -674,7 +671,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
fclose(file);
}
- closedir(event_dir);
pmu->sysfs_aliases_loaded = true;
return 0;
}
@@ -783,7 +779,7 @@ static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is
}
/* Nothing found, for core PMUs assume this means all CPUs. */
- return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL;
+ return is_core ? cpu_map__online() : NULL;
}
static bool pmu_is_uncore(int dirfd, const char *name)
@@ -847,21 +843,23 @@ static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name,
}
/**
- * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any
- * trailing suffix? The Suffix must be in form
- * tok_{digits}, or tok{digits}.
+ * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only
+ * followed by nothing or a suffix? tok may contain
+ * part of a suffix.
* @pmu_name: The pmu_name with possible suffix.
- * @tok: The possible match to pmu_name without suffix.
+ * @tok: The wildcard argument to match.
*/
-static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok)
+static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok)
{
const char *p, *suffix;
bool has_hex = false;
+ size_t tok_len = strlen(tok);
- if (strncmp(pmu_name, tok, strlen(tok)))
+ /* Check start of pmu_name for equality. */
+ if (strncmp(pmu_name, tok, tok_len))
return false;
- suffix = p = pmu_name + strlen(tok);
+ suffix = p = pmu_name + tok_len;
if (*p == 0)
return true;
@@ -887,60 +885,84 @@ static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *to
}
/**
- * pmu_uncore_alias_match - does name match the PMU name?
- * @pmu_name: the json struct pmu_event name. This may lack a suffix (which
+ * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring
+ * any trailing suffix on pmu_name and
+ * tok? The Suffix must be in form
+ * tok_{digits}, or tok{digits}.
+ * @pmu_name: The pmu_name with possible suffix.
+ * @tok: The possible match to pmu_name.
+ */
+static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok)
+{
+ size_t pmu_name_len, tok_len;
+
+ /* For robustness, check for NULL. */
+ if (pmu_name == NULL)
+ return tok == NULL;
+
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(tok, "uncore_", 7))
+ tok += 7;
+
+ pmu_name_len = pmu_name_len_no_suffix(pmu_name);
+ tok_len = pmu_name_len_no_suffix(tok);
+ if (pmu_name_len != tok_len)
+ return false;
+
+ return strncmp(pmu_name, tok, pmu_name_len) == 0;
+}
+
+
+/**
+ * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name?
+ * @pmu_name: The pmu->name or pmu->alias to match against.
+ * @to_match: the json struct pmu_event name. This may lack a suffix (which
* matches) or be of the form "socket,pmuname" which will match
* "socketX_pmunameY".
- * @name: a real full PMU name as from sysfs.
*/
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match)
{
- char *tmp = NULL, *tok, *str;
- bool res;
-
- if (strchr(pmu_name, ',') == NULL)
- return perf_pmu__match_ignoring_suffix(name, pmu_name);
+ char *mutable_to_match, *tok, *tmp;
- str = strdup(pmu_name);
- if (!str)
+ if (!pmu_name)
return false;
- /*
- * uncore alias may be from different PMU with common prefix
- */
- tok = strtok_r(str, ",", &tmp);
- if (strncmp(pmu_name, tok, strlen(tok))) {
- res = false;
- goto out;
- }
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(to_match, "uncore_", 7))
+ to_match += 7;
- /*
- * Match more complex aliases where the alias name is a comma-delimited
- * list of tokens, orderly contained in the matching PMU name.
- *
- * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
- * match "socket" in "socketX_pmunameY" and then "pmuname" in
- * "pmunameY".
- */
- while (1) {
- char *next_tok = strtok_r(NULL, ",", &tmp);
+ if (strchr(to_match, ',') == NULL)
+ return perf_pmu__match_wildcard(pmu_name, to_match);
- name = strstr(name, tok);
- if (!name ||
- (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) {
- res = false;
- goto out;
+ /* Process comma separated list of PMU name components. */
+ mutable_to_match = strdup(to_match);
+ if (!mutable_to_match)
+ return false;
+
+ tok = strtok_r(mutable_to_match, ",", &tmp);
+ while (tok) {
+ size_t tok_len = strlen(tok);
+
+ if (strncmp(pmu_name, tok, tok_len)) {
+ /* Mismatch between part of pmu_name and tok. */
+ free(mutable_to_match);
+ return false;
}
- if (!next_tok)
- break;
- tok = next_tok;
- name += strlen(tok);
+ /* Move pmu_name forward over tok and suffix. */
+ pmu_name += tok_len;
+ while (*pmu_name != '\0' && isdigit(*pmu_name))
+ pmu_name++;
+ if (*pmu_name == '_')
+ pmu_name++;
+
+ tok = strtok_r(NULL, ",", &tmp);
}
-
- res = true;
-out:
- free(str);
- return res;
+ free(mutable_to_match);
+ return *pmu_name == '\0';
}
bool pmu_uncore_identifier_match(const char *compat, const char *id)
@@ -1003,11 +1025,19 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
{
struct perf_pmu *pmu = vdata;
- if (!pe->compat || !pe->pmu)
+ if (!pe->compat || !pe->pmu) {
+ /* No data to match. */
+ return 0;
+ }
+
+ if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) &&
+ !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) {
+ /* PMU name/alias_name don't match. */
return 0;
+ }
- if (pmu_uncore_alias_match(pe->pmu, pmu->name) &&
- pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ if (pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ /* Id matched. */
perf_pmu__new_alias(pmu,
pe->name,
pe->desc,
@@ -1016,7 +1046,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
pe,
EVENT_SRC_SYS_JSON);
}
-
return 0;
}
@@ -1851,9 +1880,10 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
if (pmu->cpu_aliases_added)
nr += pmu->cpu_json_aliases;
else if (pmu->events_table)
- nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases;
+ nr += pmu_events_table__num_events(pmu->events_table, pmu) -
+ pmu->cpu_common_json_aliases;
else
- assert(pmu->cpu_json_aliases == 0);
+ assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0);
if (perf_pmu__is_tool(pmu))
nr -= tool_pmu__num_skip_events();
@@ -1974,15 +2004,82 @@ out:
return ret;
}
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
+static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard)
{
- return !strcmp(pmu->name, pmu_name) ||
- (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) ||
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ if (pmu->is_core) {
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ if (!strcmp(to_match, "default_core")) {
+ /*
+ * jevents and tests use default_core as a marker for any core
+ * PMU as the PMU name varies across architectures.
+ */
+ return true;
+ }
+ return false;
+ }
+ if (!pmu->is_uncore) {
/*
- * jevents and tests use default_core as a marker for any core
- * PMU as the PMU name varies across architectures.
+ * PMU isn't core or uncore, some kind of broken CPU mask
+ * situation. Only match exact name.
*/
- (pmu->is_core && !strcmp(pmu_name, "default_core"));
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ return false;
+ }
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match))
+ return true;
+ if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * perf_pmu__name_wildcard_match - Called by the jevents generated code to see
+ * if pmu matches the json to_match string.
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true);
+}
+
+/**
+ * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any
+ * trailing suffix on the pmu_name and/or tok?
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false);
}
bool perf_pmu__is_software(const struct perf_pmu *pmu)
@@ -2121,10 +2218,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu)
*/
int perf_pmu__caps_parse(struct perf_pmu *pmu)
{
- struct stat st;
char caps_path[PATH_MAX];
- DIR *caps_dir;
- struct dirent *evt_ent;
+ struct io_dir caps_dir;
+ struct io_dirent64 *evt_ent;
int caps_fd;
if (pmu->caps_initialized)
@@ -2135,24 +2231,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps"))
return -1;
- if (stat(caps_path, &st) < 0) {
+ caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
+ if (caps_fd == -1) {
pmu->caps_initialized = true;
return 0; /* no error if caps does not exist */
}
- caps_dir = opendir(caps_path);
- if (!caps_dir)
- return -EINVAL;
+ io_dir__init(&caps_dir, caps_fd);
- caps_fd = dirfd(caps_dir);
-
- while ((evt_ent = readdir(caps_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) {
char *name = evt_ent->d_name;
char value[128];
FILE *file;
int fd;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&caps_dir, evt_ent))
continue;
fd = openat(caps_fd, name, O_RDONLY);
@@ -2174,7 +2267,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
fclose(file);
}
- closedir(caps_dir);
+ close(caps_fd);
pmu->caps_initialized = true;
return pmu->nr_caps;
@@ -2229,29 +2322,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
name ?: "N/A", buf, config_name, config);
}
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok)
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match)
{
- const char *name = pmu->name;
- bool need_fnmatch = strisglob(tok);
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ bool need_fnmatch = strisglob(wildcard_to_match);
- if (!strncmp(tok, "uncore_", 7))
- tok += 7;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(wildcard_to_match, "uncore_", 7))
+ wildcard_to_match += 7;
- if (perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0)))
- return true;
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *pmu_name = names[i];
- name = pmu->alias_name;
- if (!name)
- return false;
+ if (!pmu_name)
+ continue;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
- return perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0));
+ if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) ||
+ (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0)))
+ return true;
+ }
+ return false;
}
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index dbed6c243a5e..b93014cc3670 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -37,6 +37,8 @@ struct perf_pmu_caps {
};
enum {
+ PERF_PMU_TYPE_PE_START = 0,
+ PERF_PMU_TYPE_PE_END = 0xFFFEFFFF,
PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
@@ -134,6 +136,11 @@ struct perf_pmu {
uint32_t cpu_json_aliases;
/** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */
uint32_t sys_json_aliases;
+ /**
+ * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when
+ * loading all sysfs events.
+ */
+ uint32_t cpu_common_json_aliases;
/** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */
bool sysfs_aliases_loaded;
/**
@@ -238,7 +245,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name);
size_t perf_pmu__num_events(struct perf_pmu *pmu);
int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
void *state, pmu_event_callback cb);
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match);
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match);
/**
* perf_pmu_is_software - is the PMU a software PMU as in it uses the
@@ -273,7 +281,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
const char *config_name);
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok);
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
int perf_pmu__pathname_scnprintf(char *buf, size_t size,
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index b493da0d22ef..b99292de7669 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -3,10 +3,10 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <api/io_dir.h>
#include <subcmd/pager.h>
#include <sys/types.h>
#include <ctype.h>
-#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
@@ -37,10 +37,25 @@
*/
static LIST_HEAD(core_pmus);
static LIST_HEAD(other_pmus);
-static bool read_sysfs_core_pmus;
-static bool read_sysfs_all_pmus;
+enum perf_tool_pmu_type {
+ PERF_TOOL_PMU_TYPE_PE_CORE,
+ PERF_TOOL_PMU_TYPE_PE_OTHER,
+ PERF_TOOL_PMU_TYPE_TOOL,
+ PERF_TOOL_PMU_TYPE_HWMON,
+
+#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
+#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
+#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
+#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
+
+#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
+ PERF_TOOL_PMU_TYPE_TOOL_MASK | \
+ PERF_TOOL_PMU_TYPE_HWMON_MASK)
+};
+static unsigned int read_pmu_types;
-static void pmu_read_sysfs(bool core_only);
+static void pmu_read_sysfs(unsigned int to_read_pmus);
size_t pmu_name_len_no_suffix(const char *str)
{
@@ -102,8 +117,7 @@ void perf_pmus__destroy(void)
perf_pmu__delete(pmu);
}
- read_sysfs_core_pmus = false;
- read_sysfs_all_pmus = false;
+ read_pmu_types = 0;
}
static struct perf_pmu *pmu_find(const char *name)
@@ -129,6 +143,7 @@ struct perf_pmu *perf_pmus__find(const char *name)
struct perf_pmu *pmu;
int dirfd;
bool core_pmu;
+ unsigned int to_read_pmus = 0;
/*
* Once PMU is loaded it stays in the list,
@@ -139,11 +154,11 @@ struct perf_pmu *perf_pmus__find(const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
dirfd = perf_pmu__event_source_devices_fd();
@@ -151,15 +166,27 @@ struct perf_pmu *perf_pmus__find(const char *name)
/*eager_load=*/false);
close(dirfd);
- if (!pmu) {
- /*
- * Looking up an inidividual PMU failed. This may mean name is
- * an alias, so read the PMUs from sysfs and try to find again.
- */
- pmu_read_sysfs(core_pmu);
+ if (pmu)
+ return pmu;
+
+ /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
+ if (!strncmp(name, "hwmon_", 6))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ else if (!strcmp(name, "tool"))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
+
+ if (to_read_pmus) {
+ pmu_read_sysfs(to_read_pmus);
pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
}
- return pmu;
+ /* Read all necessary PMUs from sysfs and see if the PMU is found. */
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK;
+ if (!core_pmu)
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ pmu_read_sysfs(to_read_pmus);
+ return pmu_find(name);
}
static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
@@ -176,11 +203,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
@@ -197,52 +224,57 @@ static int pmus_cmp(void *priv __maybe_unused,
}
/* Add all pmus in sysfs to pmu list: */
-static void pmu_read_sysfs(bool core_only)
+static void pmu_read_sysfs(unsigned int to_read_types)
{
- int fd;
- DIR *dir;
- struct dirent *dent;
struct perf_pmu *tool_pmu;
- if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
+ if ((read_pmu_types & to_read_types) == to_read_types) {
+ /* All requested PMU types have been read. */
return;
+ }
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return;
+ if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+ bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0;
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return;
- }
+ if (fd < 0)
+ goto skip_pe_pmus;
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (core_only && !is_pmu_core(dent->d_name))
- continue;
- /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
- perf_pmu__find2(fd, dent->d_name);
- }
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (core_only && !is_pmu_core(dent->d_name))
+ continue;
+ /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
+ perf_pmu__find2(fd, dent->d_name);
+ }
- closedir(dir);
- if (list_empty(&core_pmus)) {
+ close(fd);
+ }
+skip_pe_pmus:
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) {
if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
pr_err("Failure to set up any core PMUs\n");
}
list_sort(NULL, &core_pmus, pmus_cmp);
- if (!core_only) {
- tool_pmu = perf_pmus__tool_pmu();
- list_add_tail(&tool_pmu->list, &other_pmus);
- perf_pmus__read_hwmon_pmus(&other_pmus);
+
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) {
+ tool_pmu = tool_pmu__new();
+ if (tool_pmu)
+ list_add_tail(&tool_pmu->list, &other_pmus);
}
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
+ perf_pmus__read_hwmon_pmus(&other_pmus);
+
list_sort(NULL, &other_pmus, pmus_cmp);
- if (!list_empty(&core_pmus)) {
- read_sysfs_core_pmus = true;
- if (!core_only)
- read_sysfs_all_pmus = true;
- }
+
+ read_pmu_types |= to_read_types;
}
static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
@@ -263,12 +295,21 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
{
+ unsigned int to_read_pmus;
struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
- if (pmu || read_sysfs_all_pmus)
+ if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK))
return pmu;
- pmu_read_sysfs(/*core_only=*/false);
+ if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ } else {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK;
+ }
+ pmu_read_sysfs(to_read_pmus);
pmu = __perf_pmus__find_by_type(type);
return pmu;
}
@@ -282,7 +323,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
bool use_core_pmus = !pmu || pmu->is_core;
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
}
if (use_core_pmus) {
@@ -300,7 +341,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
{
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/true);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK);
return list_first_entry_or_null(&core_pmus, typeof(*pmu), list);
}
list_for_each_entry_continue(pmu, &core_pmus, list)
@@ -316,7 +357,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
} else
last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
@@ -674,47 +715,28 @@ bool perf_pmus__supports_extended_type(void)
return perf_pmus__do_support_extended_type;
}
-char *perf_pmus__default_pmu_name(void)
-{
- int fd;
- DIR *dir;
- struct dirent *dent;
- char *result = NULL;
-
- if (!list_empty(&core_pmus))
- return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name);
-
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return strdup("cpu");
-
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return strdup("cpu");
- }
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (is_pmu_core(dent->d_name)) {
- result = strdup(dent->d_name);
- break;
- }
- }
-
- closedir(dir);
- return result ?: strdup("cpu");
-}
-
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
{
struct perf_pmu *pmu = evsel->pmu;
+ bool legacy_core_type;
- if (!pmu) {
- pmu = perf_pmus__find_by_type(evsel->core.attr.type);
- ((struct evsel *)evsel)->pmu = pmu;
+ if (pmu)
+ return pmu;
+
+ pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+ legacy_core_type =
+ evsel->core.attr.type == PERF_TYPE_HARDWARE ||
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE;
+ if (!pmu && legacy_core_type) {
+ if (perf_pmus__supports_extended_type()) {
+ u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+
+ pmu = perf_pmus__find_by_type(type);
+ } else {
+ pmu = perf_pmus__find_core_pmu();
+ }
}
+ ((struct evsel *)evsel)->pmu = pmu;
return pmu;
}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index a0cb0eb2ff97..8def20e615ad 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -27,7 +27,6 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi
bool perf_pmus__have_event(const char *pname, const char *name);
int perf_pmus__num_core_pmus(void);
bool perf_pmus__supports_extended_type(void);
-char *perf_pmus__default_pmu_name(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1e769b68da37..3cc7c40f5097 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -973,6 +973,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
(unsigned long)dwarf_dieoffset(sp_die));
pf->fname = fname;
+ pf->abstrace_dieoffset = dwarf_dieoffset(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
pf->lno += pp->line;
@@ -1179,6 +1180,8 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
struct local_vars_finder *vf = data;
struct probe_finder *pf = vf->pf;
int tag;
+ Dwarf_Attribute attr;
+ Dwarf_Die var_die;
tag = dwarf_tag(die_mem);
if (tag == DW_TAG_formal_parameter ||
@@ -1196,10 +1199,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
}
}
- if (dwarf_haspc(die_mem, vf->pf->addr))
+ if (dwarf_haspc(die_mem, vf->pf->addr)) {
+ /*
+ * when DW_AT_entry_pc contains instruction address,
+ * also check if the DW_AT_abstract_origin of die_mem
+ * points to correct die.
+ */
+ if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) {
+ dwarf_formref_die(&attr, &var_die);
+ if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die))
+ goto out;
+ }
return DIE_FIND_CB_CONTINUE;
- else
- return DIE_FIND_CB_SIBLING;
+ }
+
+out:
+ return DIE_FIND_CB_SIBLING;
}
static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index dcf6cc1e1cbe..ecd6d937c592 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -63,6 +63,7 @@ struct probe_finder {
const char *fname; /* Real file name */
Dwarf_Die cu_die; /* Current CU */
Dwarf_Die sp_die;
+ Dwarf_Off abstrace_dieoffset;
struct intlist *lcache; /* Line cache for lazy match */
/* For variable searching */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a1d1e4ef6257..141ffa129c69 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key)
pstack->entries[pstack->top++] = key;
}
-void *pstack__pop(struct pstack *pstack)
-{
- void *ret;
-
- if (pstack->top == 0) {
- pr_err("%s: underflow!\n", __func__);
- return NULL;
- }
-
- ret = pstack->entries[--pstack->top];
- pstack->entries[pstack->top] = NULL;
- return ret;
-}
-
void *pstack__peek(struct pstack *pstack)
{
if (pstack->top == 0)
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index 8729b8be061d..712051b8130f 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack);
bool pstack__empty(const struct pstack *pstack);
void pstack__remove(struct pstack *pstack, void *key);
void pstack__push(struct pstack *pstack, void *key);
-void *pstack__pop(struct pstack *pstack);
void *pstack__peek(struct pstack *pstack);
#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index b4bc57859f73..f3c05da25b4a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -9,10 +9,12 @@
#include <event-parse.h>
#endif
#include <perf/mmap.h>
+#include "callchain.h"
#include "evlist.h"
#include "evsel.h"
#include "event.h"
#include "print_binary.h"
+#include "record.h"
#include "strbuf.h"
#include "thread_map.h"
#include "trace-event.h"
@@ -20,13 +22,6 @@
#include "util/sample.h"
#include <internal/lib.h>
-#define _PyUnicode_FromString(arg) \
- PyUnicode_FromString(arg)
-#define _PyUnicode_FromFormat(...) \
- PyUnicode_FromFormat(__VA_ARGS__)
-#define _PyLong_FromLong(arg) \
- PyLong_FromLong(arg)
-
PyMODINIT_FUNC PyInit_perf(void);
#define member_def(type, member, ptype, help) \
@@ -47,7 +42,7 @@ struct pyrf_event {
};
#define sample_members \
- sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \
sample_member_def(sample_pid, pid, T_INT, "event pid"), \
sample_member_def(sample_tid, tid, T_INT, "event tid"), \
sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
@@ -270,6 +265,12 @@ static PyMemberDef pyrf_sample_event__members[] = {
{ .name = NULL, },
};
+static void pyrf_sample_event__delete(struct pyrf_event *pevent)
+{
+ perf_sample__exit(&pevent->sample);
+ Py_TYPE(pevent)->tp_free((PyObject*)pevent);
+}
+
static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
@@ -336,23 +337,14 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
{
const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
struct evsel *evsel = pevent->evsel;
+ struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *field;
- if (!evsel->tp_format) {
- struct tep_event *tp_format;
-
- tp_format = trace_event__tp_format_id(evsel->core.attr.config);
- if (IS_ERR_OR_NULL(tp_format))
- return NULL;
-
- evsel->tp_format = tp_format;
- }
-
- field = tep_find_any_field(evsel->tp_format, str);
- if (!field)
+ if (IS_ERR_OR_NULL(tp_format))
return NULL;
- return tracepoint_field(pevent, field);
+ field = tep_find_any_field(tp_format, str);
+ return field ? tracepoint_field(pevent, field) : NULL;
}
#endif /* HAVE_LIBTRACEEVENT */
@@ -428,6 +420,9 @@ static int pyrf_event__setup_types(void)
pyrf_sample_event__type.tp_new =
pyrf_context_switch_event__type.tp_new =
pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+
+ pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete,
+
err = PyType_Ready(&pyrf_mmap_event__type);
if (err < 0)
goto out;
@@ -481,6 +476,11 @@ static PyObject *pyrf_event__new(const union perf_event *event)
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
return NULL;
+ // FIXME this better be dynamic or we need to parse everything
+ // before calling perf_mmap__consume(), including tracepoint fields.
+ if (sizeof(pevent->event) < event->header.size)
+ return NULL;
+
ptype = pyrf_event__type[event->header.type];
pevent = PyObject_New(struct pyrf_event, ptype);
if (pevent != NULL)
@@ -802,6 +802,28 @@ static PyMethodDef pyrf_evsel__methods[] = {
{ .ml_name = NULL, }
};
+#define evsel_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.member), \
+ 0, help }
+
+#define evsel_attr_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.core.attr.member), \
+ 0, help }
+
+static PyMemberDef pyrf_evsel__members[] = {
+ evsel_member_def(tracking, T_BOOL, "tracking event."),
+ evsel_attr_member_def(type, T_UINT, "attribute type."),
+ evsel_attr_member_def(size, T_UINT, "attribute size."),
+ evsel_attr_member_def(config, T_ULONGLONG, "attribute config."),
+ evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."),
+ evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."),
+ evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."),
+ evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."),
+ { .name = NULL, },
+};
+
static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
static PyTypeObject pyrf_evsel__type = {
@@ -811,6 +833,7 @@ static PyTypeObject pyrf_evsel__type = {
.tp_dealloc = (destructor)pyrf_evsel__delete,
.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
.tp_doc = pyrf_evsel__doc,
+ .tp_members = pyrf_evsel__members,
.tp_methods = pyrf_evsel__methods,
.tp_init = (initproc)pyrf_evsel__init,
.tp_str = pyrf_evsel__str,
@@ -851,6 +874,16 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
}
+static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist)
+{
+ struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type);
+
+ if (pcpu_map)
+ pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus);
+
+ return (PyObject *)pcpu_map;
+}
+
static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
PyObject *args, PyObject *kwargs)
{
@@ -984,20 +1017,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
evsel = evlist__event2evsel(evlist, event);
if (!evsel) {
+ Py_DECREF(pyevent);
Py_INCREF(Py_None);
return Py_None;
}
pevent->evsel = evsel;
- err = evsel__parse_sample(evsel, event, &pevent->sample);
-
- /* Consume the even only after we parsed it out. */
perf_mmap__consume(&md->core);
- if (err)
+ err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample);
+ if (err) {
+ Py_DECREF(pyevent);
return PyErr_Format(PyExc_OSError,
"perf: can't parse sample, err=%d", err);
+ }
+
return pyevent;
}
end:
@@ -1019,8 +1054,53 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
return Py_None;
}
+static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist)
+{
+ struct record_opts opts = {
+ .sample_time = true,
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000,
+ .target = {
+ .uses_mmap = true,
+ .default_per_cpu = true,
+ },
+ .nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
+ .no_buffering = true,
+ .no_inherit = true,
+ };
+ struct evlist *evlist = &pevlist->evlist;
+
+ evlist__config(evlist, &opts, &callchain_param);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist)
+{
+ evlist__disable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist)
+{
+ evlist__enable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
static PyMethodDef pyrf_evlist__methods[] = {
{
+ .ml_name = "all_cpus",
+ .ml_meth = (PyCFunction)pyrf_evlist__all_cpus,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.")
+ },
+ {
.ml_name = "mmap",
.ml_meth = (PyCFunction)pyrf_evlist__mmap,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1056,6 +1136,24 @@ static PyMethodDef pyrf_evlist__methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = PyDoc_STR("reads an event.")
},
+ {
+ .ml_name = "config",
+ .ml_meth = (PyCFunction)pyrf_evlist__config,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Apply default record options to the evlist.")
+ },
+ {
+ .ml_name = "disable",
+ .ml_meth = (PyCFunction)pyrf_evlist__disable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Disable the evsels in the evlist.")
+ },
+ {
+ .ml_name = "enable",
+ .ml_meth = (PyCFunction)pyrf_evlist__enable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Enable the evsels in the evlist.")
+ },
{ .ml_name = NULL, }
};
@@ -1254,6 +1352,8 @@ static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx);
evsel__clone(&pevsel->evsel, evsel);
+ if (evsel__is_group_leader(evsel))
+ evsel__set_leader(&pevsel->evsel, &pevsel->evsel);
return (PyObject *)pevsel;
}
@@ -1281,12 +1381,18 @@ static PyObject *pyrf__parse_events(PyObject *self, PyObject *args)
struct evlist evlist = {};
struct parse_events_error err;
PyObject *result;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
- if (!PyArg_ParseTuple(args, "s", &input))
+ if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads))
return NULL;
+ threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL;
+ cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL;
+
parse_events_error__init(&err);
- evlist__init(&evlist, NULL, NULL);
+ evlist__init(&evlist, cpus, threads);
if (parse_events(&evlist, input, &err)) {
parse_events_error__print(&err, input);
PyErr_SetFromErrno(PyExc_OSError);
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
deleted file mode 100644
index d927a0d25052..000000000000
--- a/tools/perf/util/rb_resort.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_RESORT_RB_H_
-#define _PERF_RESORT_RB_H_
-/*
- * Template for creating a class to resort an existing rb_tree according to
- * a new sort criteria, that must be present in the entries of the source
- * rb_tree.
- *
- * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Quick example, resorting threads by its shortname:
- *
- * First define the prefix (threads) to be used for the functions and data
- * structures created, and provide an expression for the sorting, then the
- * fields to be present in each of the entries in the new, sorted, rb_tree.
- *
- * The body of the init function should collect the fields, maybe
- * pre-calculating them from multiple entries in the original 'entry' from
- * the rb_tree used as a source for the entries to be sorted:
-
-DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
- b->thread->shortname) < 0,
- struct thread *thread;
-)
-{
- entry->thread = rb_entry(nd, struct thread, rb_node);
-}
-
- * After this it is just a matter of instantiating it and iterating it,
- * for a few data structures with existing rb_trees, such as 'struct machine',
- * helpers are available to get the rb_root and the nr_entries:
-
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
-
- * This will instantiate the new rb_tree and a cursor for it, that can be used as:
-
- struct rb_node *nd;
-
- resort_rb__for_each_entry(nd, threads) {
- struct thread *t = threads_entry;
- printf("%s: %d\n", t->shortname, t->tid);
- }
-
- * Then delete it:
-
- resort_rb__delete(threads);
-
- * The name of the data structures and functions will have a _sorted suffix
- * right before the method names, i.e. will look like:
- *
- * struct threads_sorted_entry {}
- * threads_sorted__insert()
- */
-
-#define DEFINE_RESORT_RB(__name, __comp, ...) \
-struct __name##_sorted_entry { \
- struct rb_node rb_node; \
- __VA_ARGS__ \
-}; \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry); \
- \
-static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
-{ \
- struct __name##_sorted_entry *a, *b; \
- a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
- b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
- return __comp; \
-} \
- \
-struct __name##_sorted { \
- struct rb_root entries; \
- struct __name##_sorted_entry nd[0]; \
-}; \
- \
-static void __name##_sorted__insert(struct __name##_sorted *sorted, \
- struct rb_node *sorted_nd) \
-{ \
- struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
- while (*p != NULL) { \
- parent = *p; \
- if (__name##_sorted__cmp(sorted_nd, parent)) \
- p = &(*p)->rb_left; \
- else \
- p = &(*p)->rb_right; \
- } \
- rb_link_node(sorted_nd, parent, p); \
- rb_insert_color(sorted_nd, &sorted->entries); \
-} \
- \
-static void __name##_sorted__sort(struct __name##_sorted *sorted, \
- struct rb_root *entries) \
-{ \
- struct rb_node *nd; \
- unsigned int i = 0; \
- for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
- struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
- __name##_sorted__init_entry(nd, snd); \
- __name##_sorted__insert(sorted, &snd->rb_node); \
- } \
-} \
- \
-static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
- int nr_entries) \
-{ \
- struct __name##_sorted *sorted; \
- sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
- if (sorted) { \
- sorted->entries = RB_ROOT; \
- __name##_sorted__sort(sorted, entries); \
- } \
- return sorted; \
-} \
- \
-static void __name##_sorted__delete(struct __name##_sorted *sorted) \
-{ \
- free(sorted); \
-} \
- \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry)
-
-#define DECLARE_RESORT_RB(__name) \
-struct __name##_sorted_entry *__name##_entry; \
-struct __name##_sorted *__name = __name##_sorted__new
-
-#define resort_rb__for_each_entry(__nd, __name) \
- for (__nd = rb_first(&__name->entries); \
- __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
- rb_node), __nd; \
- __nd = rb_next(__nd))
-
-#define resort_rb__delete(__name) \
- __name##_sorted__delete(__name), __name = NULL
-
-/*
- * Helpers for other classes that contains both an rbtree and the
- * number of entries in it:
- */
-
-/* For 'struct intlist' */
-#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
- DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
- __ilist->rblist.nr_entries)
-
-#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 30638653ad2d..0ce52f0280b8 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos,
.period = 1
};
union perf_event event;
+ int ret;
memset(&event, 0, sizeof(event));
if (basic->CL == 1) /* Native LPAR mode */
@@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos,
pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
__func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
sample.tid, sample.cpumode, sample.cpu);
- if (perf_session__deliver_synth_event(sfq->sf->session, &event,
- &sample)) {
+ ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample);
+ perf_sample__exit(&sample);
+ if (ret) {
pr_err("s390 Auxiliary Trace: failed to deliver event\n");
return false;
}
diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c
new file mode 100644
index 000000000000..605fee971f55
--- /dev/null
+++ b/tools/perf/util/sample.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "sample.h"
+#include "debug.h"
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+void perf_sample__init(struct perf_sample *sample, bool all)
+{
+ if (all) {
+ memset(sample, 0, sizeof(*sample));
+ } else {
+ sample->user_regs = NULL;
+ sample->intr_regs = NULL;
+ }
+}
+
+void perf_sample__exit(struct perf_sample *sample)
+{
+ free(sample->user_regs);
+ free(sample->intr_regs);
+}
+
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample)
+{
+ if (!sample->user_regs) {
+ sample->user_regs = zalloc(sizeof(*sample->user_regs));
+ if (!sample->user_regs)
+ pr_err("Failure to allocate sample user_regs");
+ }
+ return sample->user_regs;
+}
+
+
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample)
+{
+ if (!sample->intr_regs) {
+ sample->intr_regs = zalloc(sizeof(*sample->intr_regs));
+ if (!sample->intr_regs)
+ pr_err("Failure to allocate sample intr_regs");
+ }
+ return sample->intr_regs;
+}
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 70b2c3135555..0e96240052e9 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,7 +67,7 @@ struct aux_sample {
};
struct simd_flags {
- u64 arch:1, /* architecture (isa) */
+ u8 arch:1, /* architecture (isa) */
pred:2; /* predication */
};
@@ -114,14 +114,19 @@ struct perf_sample {
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
u64 *branch_stack_cntr;
- struct regs_dump user_regs;
- struct regs_dump intr_regs;
+ struct regs_dump *user_regs;
+ struct regs_dump *intr_regs;
struct stack_dump user_stack;
struct sample_read read;
struct aux_sample aux_sample;
struct simd_flags simd_flags;
};
+void perf_sample__init(struct perf_sample *sample, bool all);
+void perf_sample__exit(struct perf_sample *sample);
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample);
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample);
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index b1b5e94537e4..520729e78965 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -745,19 +745,30 @@ static int set_regs_in_dict(PyObject *dict,
const char *arch = perf_env__arch(evsel__env(evsel));
int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1;
- char *bf = malloc(size);
- if (!bf)
- return -1;
+ char *bf = NULL;
- regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
+ if (sample->intr_regs) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
- pydict_set_item_string_decref(dict, "iregs",
- _PyUnicode_FromString(bf));
+ regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
- regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size);
+ pydict_set_item_string_decref(dict, "iregs",
+ _PyUnicode_FromString(bf));
+ }
- pydict_set_item_string_decref(dict, "uregs",
- _PyUnicode_FromString(bf));
+ if (sample->user_regs) {
+ if (!bf) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
+ }
+ regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size);
+
+ pydict_set_item_string_decref(dict, "uregs",
+ _PyUnicode_FromString(bf));
+ }
free(bf);
return 0;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c06e3020a976..60fb9997ea0d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -950,7 +950,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a
static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *user_regs = &sample->user_regs;
+ struct regs_dump *user_regs;
+
+ if (!sample->user_regs)
+ return;
+
+ user_regs = perf_sample__user_regs(sample);
if (user_regs->regs)
regs__printf("user", user_regs, arch);
@@ -958,7 +963,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch)
static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *intr_regs = &sample->intr_regs;
+ struct regs_dump *intr_regs;
+
+ if (!sample->intr_regs)
+ return;
+
+ intr_regs = perf_sample__intr_regs(sample);
if (intr_regs->regs)
regs__printf("intr", intr_regs, arch);
@@ -1351,25 +1361,30 @@ static int perf_session__deliver_event(struct perf_session *session,
const char *file_path)
{
struct perf_sample sample;
- int ret = evlist__parse_sample(session->evlist, event, &sample);
+ int ret;
+ perf_sample__init(&sample, /*all=*/false);
+ ret = evlist__parse_sample(session->evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
+ goto out;
}
ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
- return ret;
- if (ret > 0)
- return 0;
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
ret = machines__deliver_event(&session->machines, session->evlist,
event, &sample, tool, file_offset, file_path);
if (dump_trace && sample.aux_sample.size)
auxtrace__dump_auxtrace_sample(session, &sample);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
@@ -1380,10 +1395,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
{
struct ordered_events *oe = &session->ordered_events;
const struct perf_tool *tool = session->tool;
- struct perf_sample sample = { .time = 0, };
+ struct perf_sample sample;
int fd = perf_data__fd(session->data);
int err;
+ perf_sample__init(&sample, /*all=*/true);
if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
@@ -1395,15 +1411,17 @@ static s64 perf_session__process_user_event(struct perf_session *session,
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
}
- return err;
+ break;
case PERF_RECORD_EVENT_UPDATE:
- return tool->event_update(tool, event, &session->evlist);
+ err = tool->event_update(tool, event, &session->evlist);
+ break;
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
* Deprecated, but we need to handle it for sake
* of old data files create in pipe mode.
*/
- return 0;
+ err = 0;
+ break;
case PERF_RECORD_HEADER_TRACING_DATA:
/*
* Setup for reading amidst mmap, but only when we
@@ -1412,15 +1430,20 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset, SEEK_SET);
- return tool->tracing_data(session, event);
+ err = tool->tracing_data(session, event);
+ break;
case PERF_RECORD_HEADER_BUILD_ID:
- return tool->build_id(session, event);
+ err = tool->build_id(session, event);
+ break;
case PERF_RECORD_FINISHED_ROUND:
- return tool->finished_round(tool, event, oe);
+ err = tool->finished_round(tool, event, oe);
+ break;
case PERF_RECORD_ID_INDEX:
- return tool->id_index(session, event);
+ err = tool->id_index(session, event);
+ break;
case PERF_RECORD_AUXTRACE_INFO:
- return tool->auxtrace_info(session, event);
+ err = tool->auxtrace_info(session, event);
+ break;
case PERF_RECORD_AUXTRACE:
/*
* Setup for reading amidst mmap, but only when we
@@ -1429,35 +1452,48 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset + event->header.size, SEEK_SET);
- return tool->auxtrace(session, event);
+ err = tool->auxtrace(session, event);
+ break;
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
- return tool->auxtrace_error(session, event);
+ err = tool->auxtrace_error(session, event);
+ break;
case PERF_RECORD_THREAD_MAP:
- return tool->thread_map(session, event);
+ err = tool->thread_map(session, event);
+ break;
case PERF_RECORD_CPU_MAP:
- return tool->cpu_map(session, event);
+ err = tool->cpu_map(session, event);
+ break;
case PERF_RECORD_STAT_CONFIG:
- return tool->stat_config(session, event);
+ err = tool->stat_config(session, event);
+ break;
case PERF_RECORD_STAT:
- return tool->stat(session, event);
+ err = tool->stat(session, event);
+ break;
case PERF_RECORD_STAT_ROUND:
- return tool->stat_round(session, event);
+ err = tool->stat_round(session, event);
+ break;
case PERF_RECORD_TIME_CONV:
session->time_conv = event->time_conv;
- return tool->time_conv(session, event);
+ err = tool->time_conv(session, event);
+ break;
case PERF_RECORD_HEADER_FEATURE:
- return tool->feature(session, event);
+ err = tool->feature(session, event);
+ break;
case PERF_RECORD_COMPRESSED:
err = tool->compressed(session, event, file_offset, file_path);
if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path);
- return err;
+ break;
case PERF_RECORD_FINISHED_INIT:
- return tool->finished_init(session, event);
+ err = tool->finished_init(session, event);
+ break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ break;
}
+ perf_sample__exit(&sample);
+ return err;
}
int perf_session__deliver_synth_event(struct perf_session *session,
@@ -2403,6 +2439,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
return false;
}
+bool perf_session__has_switch_events(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
{
char *bracket;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index bcf1bcf06959..db1c120a9e67 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *session, const char *msg);
+bool perf_session__has_switch_events(struct perf_session *session);
void perf_event__attr_swap(struct perf_event_attr *attr);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 649550e9b7aa..dd289d15acfd 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,6 +3,7 @@ from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
+assert cc, "Environment variable CC not set"
# Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..."
cc_tokens = cc.split()
@@ -12,8 +13,13 @@ if len(cc_tokens) > 1:
else:
cc_options = ""
+# ignore optional stderr could be None as it is set to PIPE to avoid that.
+# mypy: disable-error-code="union-attr"
cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
-src_feature_tests = getenv('srctree') + '/tools/build/feature'
+
+srctree = getenv('srctree')
+assert srctree, "Environment variable srctree, for the Linux sources, not set"
+src_feature_tests = f'{srctree}/tools/build/feature'
def clang_has_option(option):
cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
@@ -71,7 +77,7 @@ else:
# The python headers have mixed code with declarations (decls after asserts, for instance)
cflags += [ "-Wno-declaration-after-statement" ]
-src_perf = getenv('srctree') + '/tools/perf'
+src_perf = f'{srctree}/tools/perf'
build_lib = getenv('PYTHON_EXTBUILD_LIB')
build_tmp = getenv('PYTHON_EXTBUILD_TMP')
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3dd33721823f..c51049087e4e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -892,6 +892,38 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort parallelism */
+
+static int64_t
+sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->parallelism - left->parallelism;
+}
+
+static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const unsigned long *parallelism_filter = arg;
+
+ if (type != HIST_FILTER__PARALLELISM)
+ return -1;
+
+ return test_bit(he->parallelism, parallelism_filter);
+}
+
+static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*d", width, he->parallelism);
+}
+
+struct sort_entry sort_parallelism = {
+ .se_header = "Parallelism",
+ .se_cmp = sort__parallelism_cmp,
+ .se_filter = hist_entry__parallelism_filter,
+ .se_snprintf = hist_entry__parallelism_snprintf,
+ .se_width_idx = HISTC_PARALLELISM,
+};
+
/* --sort cgroup_id */
static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
@@ -2371,44 +2403,19 @@ sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right)
return left->mem_type_off - right->mem_type_off;
}
-static void fill_member_name(char *buf, size_t sz, struct annotated_member *m,
- int offset, bool first)
-{
- struct annotated_member *child;
-
- if (list_empty(&m->children))
- return;
-
- list_for_each_entry(child, &m->children, node) {
- if (child->offset <= offset && offset < child->offset + child->size) {
- int len = 0;
-
- /* It can have anonymous struct/union members */
- if (child->var_name) {
- len = scnprintf(buf, sz, "%s%s",
- first ? "" : ".", child->var_name);
- first = false;
- }
-
- fill_member_name(buf + len, sz - len, child, offset, first);
- return;
- }
- }
-}
-
static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
char buf[4096];
- buf[0] = '\0';
- if (list_empty(&he_type->self.children))
- snprintf(buf, sizeof(buf), "no field");
- else
- fill_member_name(buf, sizeof(buf), &he_type->self,
- he->mem_type_off, true);
- buf[4095] = '\0';
+ if (he_type == &unknown_type || he_type == &stackop_type ||
+ he_type == &canary_type)
+ return repsep_snprintf(bf, size, "%s", he_type->self.type_name);
+
+ if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf),
+ he->mem_type_off))
+ scnprintf(buf, sizeof(buf), "no field");
return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name,
he->mem_type_off, buf);
@@ -2534,6 +2541,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
+ DIM(SORT_PARALLELISM, "parallelism", sort_parallelism),
};
#undef DIM
@@ -2589,17 +2597,20 @@ struct hpp_dimension {
const char *name;
struct perf_hpp_fmt *fmt;
int taken;
+ int was_taken;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD, "overhead"),
+ DIM(PERF_HPP__LATENCY, "latency"),
DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+ DIM(PERF_HPP__LATENCY_ACC, "latency_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
DIM(PERF_HPP__WEIGHT1, "weight1"),
@@ -2735,6 +2746,7 @@ MK_SORT_ENTRY_CHK(thread)
MK_SORT_ENTRY_CHK(comm)
MK_SORT_ENTRY_CHK(dso)
MK_SORT_ENTRY_CHK(sym)
+MK_SORT_ENTRY_CHK(parallelism)
static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
@@ -3477,6 +3489,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
return -1;
hd->taken = 1;
+ hd->was_taken = 1;
perf_hpp_list__register_sort_field(list, fmt);
return 0;
}
@@ -3511,10 +3524,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
return 0;
}
-int hpp_dimension__add_output(unsigned col)
+int hpp_dimension__add_output(unsigned col, bool implicit)
{
+ struct hpp_dimension *hd;
+
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+ hd = &hpp_sort_dimensions[col];
+ if (implicit && !hd->was_taken)
+ return 0;
+ return __hpp_dimension__add_output(&perf_hpp_list, hd);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
@@ -3639,6 +3657,34 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
return -ESRCH;
}
+/* This should match with sort_dimension__add() above */
+static bool is_hpp_sort_key(const char *key)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) {
+ if (!strcmp(arch_specific_sort_keys[i], key) &&
+ !arch_support_sort_key(key)) {
+ return false;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (sd->name && !strncasecmp(key, sd->name, strlen(key)))
+ return false;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (!strncasecmp(key, hd->name, strlen(key)))
+ return true;
+ }
+ return false;
+}
+
static int setup_sort_list(struct perf_hpp_list *list, char *str,
struct evlist *evlist)
{
@@ -3646,7 +3692,9 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
int ret = 0;
int level = 0;
int next_level = 1;
+ int prev_level = 0;
bool in_group = false;
+ bool prev_was_hpp = false;
do {
tok = str;
@@ -3667,6 +3715,19 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
}
if (*tok) {
+ if (is_hpp_sort_key(tok)) {
+ /* keep output (hpp) sort keys in the same level */
+ if (prev_was_hpp) {
+ bool next_same = (level == next_level);
+
+ level = prev_level;
+ next_level = next_same ? level : level+1;
+ }
+ prev_was_hpp = true;
+ } else {
+ prev_was_hpp = false;
+ }
+
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
@@ -3678,6 +3739,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
ui__error("Unknown --sort key: `%s'", tok);
break;
}
+ prev_level = level;
}
level = next_level;
@@ -3773,10 +3835,24 @@ static char *setup_overhead(char *keys)
if (sort__mode == SORT_MODE__DIFF)
return keys;
- keys = prefix_if_not_in("overhead", keys);
-
- if (symbol_conf.cumulate_callchain)
- keys = prefix_if_not_in("overhead_children", keys);
+ if (symbol_conf.prefer_latency) {
+ keys = prefix_if_not_in("overhead", keys);
+ keys = prefix_if_not_in("latency", keys);
+ if (symbol_conf.cumulate_callchain) {
+ keys = prefix_if_not_in("overhead_children", keys);
+ keys = prefix_if_not_in("latency_children", keys);
+ }
+ } else if (!keys || (!strstr(keys, "overhead") &&
+ !strstr(keys, "latency"))) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency", keys);
+ keys = prefix_if_not_in("overhead", keys);
+ if (symbol_conf.cumulate_callchain) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency_children", keys);
+ keys = prefix_if_not_in("overhead_children", keys);
+ }
+ }
return keys;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a8572574e168..180d36a2bea3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -72,6 +72,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
SORT_ANNOTATE_DATA_TYPE_CACHELINE,
+ SORT_PARALLELISM,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -140,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);
-int hpp_dimension__add_output(unsigned col);
+int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ba79f73e1cf5..e852ac0d9847 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -929,12 +929,16 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
}
-static void uniquify_event_name(struct evsel *counter)
+static void evsel__uniquify_counter(struct evsel *counter)
{
const char *name, *pmu_name;
char *new_name, *config;
int ret;
+ /* No uniquification necessary. */
+ if (!counter->needs_uniquify)
+ return;
+
/* The evsel was already uniquified. */
if (counter->uniquified_name)
return;
@@ -942,19 +946,6 @@ static void uniquify_event_name(struct evsel *counter)
/* Avoid checking to uniquify twice. */
counter->uniquified_name = true;
- /* The evsel has a "name=" config term or is from libpfm. */
- if (counter->use_config_name || counter->is_libpfm_event)
- return;
-
- /* Legacy no PMU event, don't uniquify. */
- if (!counter->pmu ||
- (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW))
- return;
-
- /* A sysfs or json event replacing a legacy event, don't uniquify. */
- if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX)
- return;
-
name = evsel__name(counter);
pmu_name = counter->pmu->name;
/* Already prefixed by the PMU name. */
@@ -993,17 +984,6 @@ static void uniquify_event_name(struct evsel *counter)
}
}
-static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
-{
- return evsel__is_hybrid(evsel) && !config->hybrid_merge;
-}
-
-static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
-{
- if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config))
- uniquify_event_name(counter);
-}
-
/**
* should_skip_zero_count() - Check if the event should print 0 values.
* @config: The perf stat configuration (including aggregation mode).
@@ -1089,7 +1069,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
if (counter->merged_stat)
return;
- uniquify_counter(config, counter);
+ evsel__uniquify_counter(counter);
val = aggr->counts.val;
ena = aggr->counts.ena;
@@ -1670,7 +1650,8 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
-static void disable_uniquify(struct evlist *evlist)
+/* Should uniquify be disabled for the evlist? */
+static bool evlist__disable_uniquify(const struct evlist *evlist)
{
struct evsel *counter;
struct perf_pmu *last_pmu = NULL;
@@ -1679,20 +1660,84 @@ static void disable_uniquify(struct evlist *evlist)
evlist__for_each_entry(evlist, counter) {
/* If PMUs vary then uniquify can be useful. */
if (!first && counter->pmu != last_pmu)
- return;
+ return false;
first = false;
if (counter->pmu) {
/* Allow uniquify for uncore PMUs. */
if (!counter->pmu->is_core)
- return;
+ return false;
/* Keep hybrid event names uniquified for clarity. */
if (perf_pmus__num_core_pmus() > 1)
- return;
+ return false;
+ }
+ }
+ return true;
+}
+
+static void evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
+{
+ struct evsel *evsel;
+
+ if (counter->merged_stat) {
+ /* Counter won't be shown. */
+ return;
+ }
+
+ if (counter->use_config_name || counter->is_libpfm_event) {
+ /* Original name will be used. */
+ return;
+ }
+
+ if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
+ /* Unique hybrid counters necessary. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
+ /* Legacy event, don't uniquify. */
+ return;
+ }
+
+ if (counter->pmu && counter->pmu->is_core &&
+ counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ return;
+ }
+
+ if (config->aggr_mode == AGGR_NONE) {
+ /* Always unique with no aggregation. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ /*
+ * Do other non-merged events in the evlist have the same name? If so
+ * uniquify is necessary.
+ */
+ evlist__for_each_entry(counter->evlist, evsel) {
+ if (evsel == counter || evsel->merged_stat)
+ continue;
+
+ if (evsel__name_is(counter, evsel__name(evsel))) {
+ counter->needs_uniquify = true;
+ return;
}
}
- evlist__for_each_entry_continue(evlist, counter) {
- counter->uniquified_name = true;
+}
+
+static void evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config)
+{
+ struct evsel *counter;
+
+ if (evlist__disable_uniquify(evlist)) {
+ evlist__for_each_entry(evlist, counter)
+ counter->uniquified_name = true;
+ return;
}
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_needs_uniquify(counter, config);
}
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
@@ -1706,7 +1751,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
.first = true,
};
- disable_uniquify(evlist);
+ evlist__set_needs_uniquify(evlist, config);
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index fa8b2a1048ff..d83bda5824d2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -151,6 +151,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
{
struct evsel *cur;
int evsel_ctx = evsel_context(evsel);
+ struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
evlist__for_each_entry(evsel->evlist, cur) {
struct perf_stat_aggr *aggr;
@@ -177,7 +178,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
* Except the SW CLOCK events,
* ignore if not the PMU we're looking for.
*/
- if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu))
+ if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur)))
continue;
aggr = &cur->stats->aggr[aggr_idx];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 7c2ccdcc3fdb..1f7abd8754c7 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -535,7 +535,10 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
return 0;
}
-/* events should have the same name, scale, unit, cgroup but on different PMUs */
+/*
+ * Events should have the same name, scale, unit, cgroup but on different core
+ * PMUs or on different but matching uncore PMUs.
+ */
static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
{
if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
@@ -553,7 +556,13 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
return false;
- return evsel_a->pmu != evsel_b->pmu;
+ if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL)
+ return false;
+
+ if (evsel_a->pmu->is_core)
+ return evsel_b->pmu->is_core;
+
+ return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name);
}
static void evsel__merge_aliases(struct evsel *evsel)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 66fd1249660a..fbf6d0f73af9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -7,6 +7,7 @@
#include <unistd.h>
#include <inttypes.h>
+#include "compress.h"
#include "dso.h"
#include "map.h"
#include "maps.h"
@@ -1173,33 +1174,6 @@ out:
#endif
-static int dso__swap_init(struct dso *dso, unsigned char eidata)
-{
- static unsigned int const endian = 1;
-
- dso__set_needs_swap(dso, DSO_SWAP__NO);
-
- switch (eidata) {
- case ELFDATA2LSB:
- /* We are big endian, DSO is little endian. */
- if (*(unsigned char const *)&endian != 1)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- case ELFDATA2MSB:
- /* We are little endian, DSO is big endian. */
- if (*(unsigned char const *)&endian != 0)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- default:
- pr_err("unrecognized DSO data encoding %d\n", eidata);
- return -EINVAL;
- }
-
- return 0;
-}
-
bool symsrc__possibly_runtime(struct symsrc *ss)
{
return ss->dynsym || ss->opdsec;
@@ -1228,6 +1202,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
ehdr.e_type == ET_DYN;
}
+static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret)
+{
+ Elf *elf_embedded;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
+ FILE *wrapped;
+ size_t shndx;
+ char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX";
+ int ret, temp_fd;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx);
+ if (!scn) {
+ *dso__load_errno(dso) = -ENOENT;
+ return NULL;
+ }
+
+ if (shdr.sh_type == SHT_NOBITS) {
+ pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn_data = elf_rawdata(scn, NULL);
+ if (!scn_data) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r");
+ if (!wrapped) {
+ pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ return NULL;
+ }
+
+ temp_fd = mkstemp(temp_filename);
+ if (temp_fd < 0) {
+ pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ fclose(wrapped);
+ return NULL;
+ }
+ unlink(temp_filename);
+
+ ret = lzma_decompress_stream_to_file(wrapped, temp_fd);
+ fclose(wrapped);
+ if (ret < 0) {
+ *dso__load_errno(dso) = -errno;
+ close(temp_fd);
+ return NULL;
+ }
+
+ elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (!elf_embedded) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ close(temp_fd);
+ return NULL;
+ }
+ pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name);
+ *fd_ret = temp_fd;
+ return elf_embedded;
+}
+
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
@@ -1256,6 +1305,19 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
goto out_close;
}
+ if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) {
+ int new_fd;
+ Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd);
+
+ if (!embedded)
+ goto out_close;
+
+ elf_end(elf);
+ close(fd);
+ fd = new_fd;
+ elf = embedded;
+ }
+
if (gelf_getehdr(elf, &ehdr) == NULL) {
*dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
pr_debug("%s: cannot get elf header.\n", __func__);
@@ -1854,10 +1916,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
kmodule, 1);
if (err < 0)
return err;
- err += nr;
+ nr += err;
}
- return err;
+ /*
+ * The .gnu_debugdata is a special situation: it contains a symbol
+ * table, but the runtime file may also contain dynsym entries which are
+ * not present there. We need to load both.
+ */
+ if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) {
+ err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss,
+ kmodule, 1);
+ if (err < 0)
+ return err;
+ nr += err;
+ }
+
+ return nr;
}
static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 49b08adc6ee3..11540219481b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -18,6 +18,7 @@
#include "annotate.h"
#include "build-id.h"
#include "cap.h"
+#include "cpumap.h"
#include "dso.h"
#include "util.h" // lsdir()
#include "debug.h"
@@ -84,6 +85,7 @@ static enum dso_binary_type binary_type_symtab[] = {
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
DSO_BINARY_TYPE__GUEST_KMODULE,
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
@@ -1716,6 +1718,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
return !kmod && dso__kernel(dso) == DSO_SPACE__USER;
case DSO_BINARY_TYPE__KALLSYMS:
@@ -2471,6 +2474,36 @@ int symbol__annotation_init(void)
return 0;
}
+static int setup_parallelism_bitmap(void)
+{
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int i, err = -1;
+
+ if (symbol_conf.parallelism_list_str == NULL)
+ return 0;
+
+ map = perf_cpu_map__new(symbol_conf.parallelism_list_str);
+ if (map == NULL) {
+ pr_err("failed to parse parallelism filter list\n");
+ return -1;
+ }
+
+ bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) {
+ pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu);
+ goto out_delete_map;
+ }
+ __clear_bit(cpu.cpu, symbol_conf.parallelism_filter);
+ }
+
+ err = 0;
+out_delete_map:
+ perf_cpu_map__put(map);
+ return err;
+}
+
int symbol__init(struct perf_env *env)
{
const char *symfs;
@@ -2490,6 +2523,9 @@ int symbol__init(struct perf_env *env)
return -1;
}
+ if (setup_parallelism_bitmap())
+ return -1;
+
if (setup_list(&symbol_conf.dso_list,
symbol_conf.dso_list_str, "dso") < 0)
return -1;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index a9c51acc722f..cd9aa82c7d5a 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -3,6 +3,8 @@
#define __PERF_SYMBOL_CONF 1
#include <stdbool.h>
+#include <linux/bitmap.h>
+#include "perf.h"
struct strlist;
struct intlist;
@@ -47,7 +49,9 @@ struct symbol_conf {
keep_exited_threads,
annotate_data_member,
annotate_data_sample,
- skip_empty;
+ skip_empty,
+ enable_latency,
+ prefer_latency;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -62,6 +66,7 @@ struct symbol_conf {
*pid_list_str,
*tid_list_str,
*sym_list_str,
+ *parallelism_list_str,
*col_width_list_str,
*bt_stop_list_str;
const char *addr2line_path;
@@ -82,6 +87,7 @@ struct symbol_conf {
int pad_output_len_dso;
int group_sort_idx;
int addr_range;
+ DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1);
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 6923b0d5efed..2fc4d0537840 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -38,6 +38,7 @@
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -767,10 +768,10 @@ static int __event__synthesize_thread(union perf_event *comm_event,
bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
- struct dirent **dirent;
+ struct io_dir iod;
+ struct io_dirent64 *dent;
pid_t tgid, ppid;
int rc = 0;
- int i, n;
/* special case: only send one comm event using passed in pid */
if (!full) {
@@ -802,16 +803,19 @@ static int __event__synthesize_thread(union perf_event *comm_event,
snprintf(filename, sizeof(filename), "%s/proc/%d/task",
machine->root_dir, pid);
- n = scandir(filename, &dirent, filter_task, NULL);
- if (n < 0)
- return n;
+ io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0)
+ return -1;
- for (i = 0; i < n; i++) {
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
char *end;
pid_t _pid;
bool kernel_thread = false;
- _pid = strtol(dirent[i]->d_name, &end, 10);
+ if (!isdigit(dent->d_name[0]))
+ continue;
+
+ _pid = strtol(dent->d_name, &end, 10);
if (*end)
continue;
@@ -845,9 +849,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
}
}
- for (i = 0; i < n; i++)
- zfree(&dirent[i]);
- free(dirent);
+ close(iod.dirfd);
return rc;
}
@@ -1508,9 +1510,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
+ if (sample->user_regs && sample->user_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1536,9 +1538,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
result += sizeof(u64);
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
+ if (sample->intr_regs && sample->intr_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1707,10 +1709,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
- *array++ = sample->user_regs.abi;
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
- memcpy(array, sample->user_regs.regs, sz);
+ if (sample->user_regs && sample->user_regs->abi) {
+ *array++ = sample->user_regs->abi;
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
+ memcpy(array, sample->user_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
@@ -1743,10 +1745,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
- *array++ = sample->intr_regs.abi;
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
- memcpy(array, sample->intr_regs.regs, sz);
+ if (sample->intr_regs && sample->intr_regs->abi) {
+ *array++ = sample->intr_regs->abi;
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 928aca4cd6e9..67a8ec10e9e4 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -7,119 +7,127 @@
#include "syscalltbl.h"
#include <stdlib.h>
+#include <asm/bitsperlong.h>
#include <linux/compiler.h>
+#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <string.h>
#include "string2.h"
-#include <syscall_table.h>
-const int syscalltbl_native_max_id = SYSCALLTBL_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl;
+#include "trace/beauty/generated/syscalltbl.c"
-struct syscall {
- int id;
- const char *name;
-};
-
-static int syscallcmpname(const void *vkey, const void *ventry)
-{
- const char *key = vkey;
- const struct syscall *entry = ventry;
-
- return strcmp(key, entry->name);
-}
-
-static int syscallcmp(const void *va, const void *vb)
+static const struct syscalltbl *find_table(int e_machine)
{
- const struct syscall *a = va, *b = vb;
+ static const struct syscalltbl *last_table;
+ static int last_table_machine = EM_NONE;
- return strcmp(a->name, b->name);
-}
+ /* Tables only exist for EM_SPARC. */
+ if (e_machine == EM_SPARCV9)
+ e_machine = EM_SPARC;
-static int syscalltbl__init_native(struct syscalltbl *tbl)
-{
- int nr_entries = 0, i, j;
- struct syscall *entries;
+ if (last_table_machine == e_machine && last_table != NULL)
+ return last_table;
- for (i = 0; i <= syscalltbl_native_max_id; ++i)
- if (syscalltbl_native[i])
- ++nr_entries;
+ for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) {
+ const struct syscalltbl *entry = &syscalltbls[i];
- entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
- if (tbl->syscalls.entries == NULL)
- return -1;
+ if (entry->e_machine != e_machine && entry->e_machine != EM_NONE)
+ continue;
- for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
- if (syscalltbl_native[i]) {
- entries[j].name = syscalltbl_native[i];
- entries[j].id = i;
- ++j;
- }
+ last_table = entry;
+ last_table_machine = e_machine;
+ return entry;
}
-
- qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
- tbl->syscalls.nr_entries = nr_entries;
- tbl->syscalls.max_id = syscalltbl_native_max_id;
- return 0;
+ return NULL;
}
-struct syscalltbl *syscalltbl__new(void)
+const char *syscalltbl__name(int e_machine, int id)
{
- struct syscalltbl *tbl = malloc(sizeof(*tbl));
- if (tbl) {
- if (syscalltbl__init_native(tbl)) {
- free(tbl);
- return NULL;
- }
+ const struct syscalltbl *table = find_table(e_machine);
+
+ if (e_machine == EM_MIPS && id > 1000) {
+ /*
+ * MIPS may encode the N32/64/O32 type in the high part of
+ * syscall number. Mask this off if present. See the values of
+ * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux.
+ */
+ id = id % 1000;
}
- return tbl;
+ if (table && id >= 0 && id < table->num_to_name_len)
+ return table->num_to_name[id];
+ return NULL;
}
-void syscalltbl__delete(struct syscalltbl *tbl)
+struct syscall_cmp_key {
+ const char *name;
+ const char *const *tbl;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
{
- zfree(&tbl->syscalls.entries);
- free(tbl);
+ const struct syscall_cmp_key *key = vkey;
+ const uint16_t *entry = ventry;
+
+ return strcmp(key->name, key->tbl[*entry]);
}
-const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+int syscalltbl__id(int e_machine, const char *name)
{
- return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+ const struct syscalltbl *table = find_table(e_machine);
+ struct syscall_cmp_key key;
+ const uint16_t *id;
+
+ if (!table)
+ return -1;
+
+ key.name = name;
+ key.tbl = table->num_to_name;
+ id = bsearch(&key, table->sorted_names, table->sorted_names_len,
+ sizeof(table->sorted_names[0]), syscallcmpname);
+
+ return id ? *id : -1;
}
-int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+int syscalltbl__num_idx(int e_machine)
{
- struct syscall *sc = bsearch(name, tbl->syscalls.entries,
- tbl->syscalls.nr_entries, sizeof(*sc),
- syscallcmpname);
+ const struct syscalltbl *table = find_table(e_machine);
+
+ if (!table)
+ return 0;
- return sc ? sc->id : -1;
+ return table->sorted_names_len;
}
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx)
+int syscalltbl__id_at_idx(int e_machine, int idx)
{
- struct syscall *syscalls = tbl->syscalls.entries;
+ const struct syscalltbl *table = find_table(e_machine);
- return idx < tbl->syscalls.nr_entries ? syscalls[idx].id : -1;
+ if (!table)
+ return -1;
+
+ assert(idx >= 0 && idx < table->sorted_names_len);
+ return table->sorted_names[idx];
}
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx)
{
- int i;
- struct syscall *syscalls = tbl->syscalls.entries;
+ const struct syscalltbl *table = find_table(e_machine);
+
+ for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) {
+ const char *name = table->num_to_name[table->sorted_names[i]];
- for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
- if (strglobmatch(syscalls[i].name, syscall_glob)) {
+ if (strglobmatch(name, syscall_glob)) {
*idx = i;
- return syscalls[i].id;
+ return table->sorted_names[i];
}
}
return -1;
}
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx)
{
*idx = -1;
- return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+ return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx);
}
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 362411a6d849..2bb628eff367 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -2,22 +2,12 @@
#ifndef __PERF_SYSCALLTBL_H
#define __PERF_SYSCALLTBL_H
-struct syscalltbl {
- struct {
- int max_id;
- int nr_entries;
- void *entries;
- } syscalls;
-};
+const char *syscalltbl__name(int e_machine, int id);
+int syscalltbl__id(int e_machine, const char *name);
+int syscalltbl__num_idx(int e_machine);
+int syscalltbl__id_at_idx(int e_machine, int idx);
-struct syscalltbl *syscalltbl__new(void);
-void syscalltbl__delete(struct syscalltbl *tbl);
-
-const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
-int syscalltbl__id(struct syscalltbl *tbl, const char *name);
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx);
-
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx);
#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0ffdd52d86d7..89585f53c1d5 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
#include <errno.h>
+#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -16,6 +18,7 @@
#include "symbol.h"
#include "unwind.h"
#include "callchain.h"
+#include "dwarf-regs.h"
#include <api/fs/fs.h>
@@ -51,6 +54,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread__set_ppid(thread, -1);
thread__set_cpu(thread, -1);
thread__set_guest_cpu(thread, -1);
+ thread__set_e_machine(thread, EM_NONE);
thread__set_lbr_stitch_enable(thread, false);
INIT_LIST_HEAD(thread__namespaces_list(thread));
INIT_LIST_HEAD(thread__comm_list(thread));
@@ -423,6 +427,82 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
}
}
+static uint16_t read_proc_e_machine_for_pid(pid_t pid)
+{
+ char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */];
+ int fd;
+ uint16_t e_machine = EM_NONE;
+
+ snprintf(path, sizeof(path), "/proc/%d/exe", pid);
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine))
+ e_machine = EM_NONE;
+ close(fd);
+ }
+ return e_machine;
+}
+
+static int thread__e_machine_callback(struct map *map, void *machine)
+{
+ struct dso *dso = map__dso(map);
+
+ _Static_assert(0 == EM_NONE, "Unexpected EM_NONE");
+ if (!dso)
+ return EM_NONE;
+
+ return dso__e_machine(dso, machine);
+}
+
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine)
+{
+ pid_t tid, pid;
+ uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine;
+
+ if (e_machine != EM_NONE)
+ return e_machine;
+
+ tid = thread__tid(thread);
+ pid = thread__pid(thread);
+ if (pid != tid) {
+ struct thread *parent = machine__findnew_thread(machine, pid, pid);
+
+ if (parent) {
+ e_machine = thread__e_machine(parent, machine);
+ thread__set_e_machine(thread, e_machine);
+ return e_machine;
+ }
+ /* Something went wrong, fallback. */
+ }
+ /* Reading on the PID thread. First try to find from the maps. */
+ e_machine = maps__for_each_map(thread__maps(thread),
+ thread__e_machine_callback,
+ machine);
+ if (e_machine == EM_NONE) {
+ /* Maps failed, perhaps we're live with map events disabled. */
+ bool is_live = machine->machines == NULL;
+
+ if (!is_live) {
+ /* Check if the session has a data file. */
+ struct perf_session *session = container_of(machine->machines,
+ struct perf_session,
+ machines);
+
+ is_live = !!session->data;
+ }
+ /* Read from /proc/pid/exe if live. */
+ if (is_live)
+ e_machine = read_proc_e_machine_for_pid(pid);
+ }
+ if (e_machine != EM_NONE)
+ thread__set_e_machine(thread, e_machine);
+ else
+ e_machine = EM_HOST;
+ return e_machine;
+}
+
struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
{
if (thread__pid(thread) == thread__tid(thread))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 6cbf6eb2812e..cd574a896418 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -60,7 +60,11 @@ DECLARE_RC_STRUCT(thread) {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
-
+ /**
+ * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not
+ * computed.
+ */
+ uint16_t e_machine;
/* LBR call stack stitch */
bool lbr_stitch_enable;
struct lbr_stitch *lbr_stitch;
@@ -302,6 +306,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep
RC_CHK_ACCESS(thread)->filter_entry_depth = depth;
}
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine);
+
+static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine)
+{
+ RC_CHK_ACCESS(thread)->e_machine = e_machine;
+}
+
+
static inline bool thread__lbr_stitch_enable(const struct thread *thread)
{
return RC_CHK_ACCESS(thread)->lbr_stitch_enable;
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index 4fb097578479..97b327d1ce4a 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -62,7 +62,8 @@ int tool_pmu__num_skip_events(void)
const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
{
- if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX)
+ if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
+ !tool_pmu__skip_event(tool_pmu__event_names[ev]))
return tool_pmu__event_names[ev];
return NULL;
@@ -354,6 +355,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
if (online) {
*result = perf_cpu_map__nr(online);
+ perf_cpu_map__put(online);
return true;
}
return false;
@@ -489,17 +491,24 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
return 0;
}
-struct perf_pmu *perf_pmus__tool_pmu(void)
+struct perf_pmu *tool_pmu__new(void)
{
- static struct perf_pmu tool = {
- .name = "tool",
- .type = PERF_PMU_TYPE_TOOL,
- .aliases = LIST_HEAD_INIT(tool.aliases),
- .caps = LIST_HEAD_INIT(tool.caps),
- .format = LIST_HEAD_INIT(tool.format),
- };
- if (!tool.events_table)
- tool.events_table = find_core_events_table("common", "common");
-
- return &tool;
+ struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
+
+ if (!tool)
+ goto out;
+ tool->name = strdup("tool");
+ if (!tool->name) {
+ zfree(&tool);
+ goto out;
+ }
+
+ tool->type = PERF_PMU_TYPE_TOOL;
+ INIT_LIST_HEAD(&tool->aliases);
+ INIT_LIST_HEAD(&tool->caps);
+ INIT_LIST_HEAD(&tool->format);
+ tool->events_table = find_core_events_table("common", "common");
+
+out:
+ return tool;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index a60184859080..c6ad1dd90a56 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -51,6 +51,6 @@ int evsel__tool_pmu_open(struct evsel *evsel,
int start_cpu_map_idx, int end_cpu_map_idx);
int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
-struct perf_pmu *perf_pmus__tool_pmu(void);
+struct perf_pmu *tool_pmu__new(void);
#endif /* __TOOL_PMU_H */
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 4e81e02a4f18..72abb28b7b5a 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -309,53 +309,107 @@ static const struct {
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
- {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"},
{0, NULL}
};
-static const char *sample_flags_to_name(u32 flags)
+static const struct {
+ u32 flags;
+ const char *name;
+} branch_events[] = {
+ {PERF_IP_FLAG_BRANCH_MISS, "miss"},
+ {PERF_IP_FLAG_NOT_TAKEN, "not_taken"},
+ {0, NULL}
+};
+
+static int sample_flags_to_name(u32 flags, char *str, size_t size)
{
int i;
+ const char *prefix;
+ int pos = 0, ret, ev_idx = 0;
+ u32 xf = flags & PERF_ADDITIONAL_STATE_MASK;
+ u32 types, events;
+ char xs[16] = { 0 };
+
+ /* Clear additional state bits */
+ flags &= ~PERF_ADDITIONAL_STATE_MASK;
+
+ if (flags & PERF_IP_FLAG_TRACE_BEGIN)
+ prefix = "tr strt ";
+ else if (flags & PERF_IP_FLAG_TRACE_END)
+ prefix = "tr end ";
+ else
+ prefix = "";
+
+ ret = snprintf(str + pos, size - pos, "%s", prefix);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+
+ flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END);
+
+ types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; sample_flags[i].name; i++) {
+ if (sample_flags[i].flags != types)
+ continue;
+
+ ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ break;
+ }
- for (i = 0; sample_flags[i].name ; i++) {
- if (sample_flags[i].flags == flags)
- return sample_flags[i].name;
+ events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; branch_events[i].name; i++) {
+ if (!(branch_events[i].flags & events))
+ continue;
+
+ ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s",
+ branch_events[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ ev_idx++;
}
- return NULL;
+ /* Add an end character '/' for events */
+ if (ev_idx) {
+ ret = snprintf(str + pos, size - pos, "/");
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ }
+
+ if (!xf)
+ return pos;
+
+ snprintf(xs, sizeof(xs), "(%s%s%s)",
+ flags & PERF_IP_FLAG_IN_TX ? "x" : "",
+ flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
+ flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
+
+ /* Right align the string if its length is less than the limit */
+ if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE)
+ ret = snprintf(str + pos, size - pos, "%*s",
+ (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs);
+ else
+ ret = snprintf(str + pos, size - pos, " %s", xs);
+ if (ret < 0)
+ return ret;
+
+ return pos + ret;
}
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
{
- u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
- PERF_IP_FLAG_INTR_TOGGLE;
const char *chars = PERF_IP_FLAG_CHARS;
const size_t n = strlen(PERF_IP_FLAG_CHARS);
- const char *name = NULL;
size_t i, pos = 0;
- char xs[16] = {0};
-
- if (flags & xf)
- snprintf(xs, sizeof(xs), "(%s%s%s)",
- flags & PERF_IP_FLAG_IN_TX ? "x" : "",
- flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
- flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
-
- name = sample_flags_to_name(flags & ~xf);
- if (name)
- return snprintf(str, sz, "%-15s%6s", name, xs);
-
- if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
- name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
- if (name)
- return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
- }
+ int ret;
- if (flags & PERF_IP_FLAG_TRACE_END) {
- name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
- if (name)
- return snprintf(str, sz, "tr end %-7s%6s", name, xs);
- }
+ ret = sample_flags_to_name(flags, str, sz);
+ if (ret > 0)
+ return ret;
for (i = 0; i < n; i++, flags >>= 1) {
if ((flags & 1) && pos < sz)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index ac9fde2f980c..71e680bc3d4b 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -145,6 +145,8 @@ int common_flags(struct scripting_context *context);
int common_lock_depth(struct scripting_context *context);
#define SAMPLE_FLAGS_BUF_SIZE 64
+#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21
+
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index 32c39cfe209b..4c6a86e1cb54 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit)
int unit_number__scnprintf(char *buf, size_t size, u64 n)
{
- char unit[4] = "BKMG";
+ char unit[] = "BKMG";
int i = 0;
while (((n / 1024) > 1) && (i < 3)) {
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index bde216e630d2..793d11832694 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -190,7 +190,10 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
int offset;
int ret;
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ if (!ui->sample->user_regs)
+ return false;
+
+ ret = perf_reg_value(&start, ui->sample->user_regs,
perf_arch_reg_sp(arch));
if (ret)
return false;
@@ -273,7 +276,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
Dwarf_Word ip;
int err = -EINVAL, i;
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
@@ -286,7 +289,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
if (!ui->dwfl)
goto out;
- err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch));
+ err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch));
if (err)
goto out;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 16c2b03831f3..9fb2c1343c7f 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
int ret, fd;
if (dso__data(dso)->eh_frame_hdr_offset == 0) {
- fd = dso__data_get_fd(dso, ui->machine);
- if (fd < 0)
+ if (!dso__data_get_fd(dso, ui->machine, &fd))
return -EINVAL;
/* Check the .eh_frame section for unwinding info */
@@ -372,8 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
* has to be pointed by symsrc_filename
*/
if (ofs == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd) {
ofs = elf_section_offset(fd, ".debug_frame");
dso__data_put_fd(dso);
}
@@ -485,14 +483,16 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
/* Check the .debug_frame section for unwinding info */
if (ret < 0 &&
!read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) {
- int fd = dso__data_get_fd(dso, ui->machine);
- int is_exec = elf_is_exec(fd, dso__name(dso));
+ int fd;
u64 start = map__start(map);
- unw_word_t base = is_exec ? 0 : start;
+ unw_word_t base = start;
const char *symfile;
- if (fd >= 0)
+ if (dso__data_get_fd(dso, ui->machine, &fd)) {
+ if (elf_is_exec(fd, dso__name(dso)))
+ base = 0;
dso__data_put_fd(dso);
+ }
symfile = dso__symsrc_filename(dso) ?: dso__name(dso);
@@ -579,12 +579,12 @@ static int access_mem(unw_addr_space_t __maybe_unused as,
int ret;
/* Don't support write, probably not needed. */
- if (__write || !stack || !ui->sample->user_regs.regs) {
+ if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample),
perf_arch_reg_sp(arch));
if (ret)
return ret;
@@ -628,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
return 0;
}
- if (!ui->sample->user_regs.regs) {
+ if (!ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
@@ -637,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
if (id < 0)
return -EINVAL;
- ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id);
if (ret) {
if (!ui->best_effort)
pr_err("unwind: can't read reg %d\n", regnum);
@@ -741,7 +741,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
unw_cursor_t c;
int ret, i = 0;
- ret = perf_reg_value(&val, &ui->sample->user_regs,
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample),
perf_arch_reg_ip(arch));
if (ret)
return ret;
@@ -808,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.best_effort = best_effort
};
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
if (max_stack <= 0)