summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-31 08:52:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-31 08:52:33 -0700
commit802f0d58d52e8e34e08718479475ccdff0caffa0 (patch)
tree305f3be98d12b0c6881a6c59eb92e795e6088e51 /tools/perf/util
parent4e82c87058f45e79eeaa4d5bcc3b38dd3dce7209 (diff)
parent35d13f841a3d8159ef20d5e32a9ed3faa27875bc (diff)
downloadlwn-802f0d58d52e8e34e08718479475ccdff0caffa0.tar.gz
lwn-802f0d58d52e8e34e08718479475ccdff0caffa0.zip
Merge tag 'perf-tools-for-v6.15-2025-03-27' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf record: - Introduce latency profiling using scheduler information. The latency profiling is to show impacts on wall-time rather than cpu-time. By tracking context switches, it can weight samples and find which part of the code contributed more to the execution latency. The value (period) of the sample is weighted by dividing it by the number of parallel execution at the moment. The parallelism is tracked in perf report with sched-switch records. This will reduce the portion that are run in parallel and in turn increase the portion of serial executions. For now, it's limited to profile processes, IOW system-wide profiling is not supported. You can add --latency option to enable this. $ perf record --latency -- make -C tools/perf I've run the above command for perf build which adds -j option to make with the number of CPUs in the system internally. Normally it'd show something like below: $ perf report -F overhead,comm ... # # Overhead Command # ........ ............... # 78.97% cc1 6.54% python3 4.21% shellcheck 3.28% ld 1.80% as 1.37% cc1plus 0.80% sh 0.62% clang 0.56% gcc 0.44% perl 0.39% make ... The cc1 takes around 80% of the overhead as it's the actual compiler. However it runs in parallel so its contribution to latency may be less than that. Now, perf report will show both overhead and latency (if --latency was given at record time) like below: $ perf report -s comm ... # # Overhead Latency Command # ........ ........ ............... # 78.97% 48.66% cc1 6.54% 25.68% python3 4.21% 0.39% shellcheck 3.28% 13.70% ld 1.80% 2.56% as 1.37% 3.08% cc1plus 0.80% 0.98% sh 0.62% 0.61% clang 0.56% 0.33% gcc 0.44% 1.71% perl 0.39% 0.83% make ... You can see latency of cc1 goes down to around 50% and python3 and ld contribute a lot more than their overhead. You can use --latency option in perf report to get the same result but ordered by latency. $ perf report --latency -s comm perf report: - As a side effect of the latency profiling work, it adds a new output field 'latency' and a sort key 'parallelism'. The below is a result from my system with 64 CPUs. The build was well-parallelized but contained some serial portions. $ perf report -s parallelism ... # # Overhead Latency Parallelism # ........ ........ ........... # 16.95% 1.54% 62 13.38% 1.24% 61 12.50% 70.47% 1 11.81% 1.06% 63 7.59% 0.71% 60 4.33% 12.20% 2 3.41% 0.33% 59 2.05% 0.18% 64 1.75% 1.09% 9 1.64% 1.85% 5 ... - Support Feodra mini-debuginfo which is a LZMA compressed symbol table inside ".gnu_debugdata" ELF section. perf annotate: - Add --code-with-type option to enable data-type profiling with the usual annotate output. Instead of focusing on data structure, it shows code annotation together with data type it accesses in case the instruction refers to a memory location (and it was able to resolve the target data type). Currently it only works with --stdio. $ perf annotate --stdio --code-with-type ... Percent | Source code & Disassembly of vmlinux for cpu/mem-loads,ldlat=30/pp (18 samples, percent: local period) ---------------------------------------------------------------------------------------------------------------------- : 0 0xffffffff81050610 <__fdget>: 0.00 : ffffffff81050610: callq 0xffffffff81c01b80 <__fentry__> # data-type: (stack operation) 0.00 : ffffffff81050615: pushq %rbp # data-type: (stack operation) 0.00 : ffffffff81050616: movq %rsp, %rbp 0.00 : ffffffff81050619: pushq %r15 # data-type: (stack operation) 0.00 : ffffffff8105061b: pushq %r14 # data-type: (stack operation) 0.00 : ffffffff8105061d: pushq %rbx # data-type: (stack operation) 0.00 : ffffffff8105061e: subq $0x10, %rsp 0.00 : ffffffff81050622: movl %edi, %ebx 0.00 : ffffffff81050624: movq %gs:0x7efc4814(%rip), %rax # 0x14e40 <current_task> # data-type: struct task_struct* +0 0.00 : ffffffff8105062c: movq 0x8d0(%rax), %r14 # data-type: struct task_struct +0x8d0 (files) 0.00 : ffffffff81050633: movl (%r14), %eax # data-type: struct files_struct +0 (count.counter) 0.00 : ffffffff81050636: cmpl $0x1, %eax 0.00 : ffffffff81050639: je 0xffffffff810506a9 <__fdget+0x99> 0.00 : ffffffff8105063b: movq 0x20(%r14), %rcx # data-type: struct files_struct +0x20 (fdt) 0.00 : ffffffff8105063f: movl (%rcx), %eax # data-type: struct fdtable +0 (max_fds) 0.00 : ffffffff81050641: cmpl %ebx, %eax 0.00 : ffffffff81050643: jbe 0xffffffff810506ef <__fdget+0xdf> 0.00 : ffffffff81050649: movl %ebx, %r15d 5.56 : ffffffff8105064c: movq 0x8(%rcx), %rdx # data-type: struct fdtable +0x8 (fd) ... The "# data-type:" part was added with this change. The first few entries are not very interesting. But later you can it accesses a couple of fields in the task_struct, files_struct and fdtable. perf trace: - Support syscall tracing for different ABI. For example it can trace system calls for 32-bit applications on 64-bit kernel transparently. - Add --summary-mode=total option to show global syscall summary. The default is 'thread' to show per-thread syscall summary. Python support: - Add more interfaces to 'perf' module to parse events, and config, enable or disable the event list properly so that it can implement basic functionalities purely in Python. There is an example code for these new interfaces in python/tracepoint.py. - Add mypy and pylint support to enable build time checking. Fix some code based on the findings from these tools. Internals: - Introduce io_dir__readdir() API to make directory traveral (usually for proc or sysfs) efficient with less memory footprint. JSON vendor events: - Add events and metrics for ARM Neoverse N3 and V3 - Update events and metrics on various Intel CPUs - Add/update events for a number of SiFive processors" * tag 'perf-tools-for-v6.15-2025-03-27' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (229 commits) perf bpf-filter: Fix a parsing error with comma perf report: Fix a memory leak for perf_env on AMD perf trace: Fix wrong size to bpf_map__update_elem call perf tools: annotate asm_pure_loop.S perf python: Fix setup.py mypy errors perf test: Address attr.py mypy error perf build: Add pylint build tests perf build: Add mypy build tests perf build: Rename TEST_LOGS to SHELL_TEST_LOGS tools/build: Don't pass test log files to linker perf bench sched pipe: fix enforced blocking reads in worker_thread perf tools: Fix is_compat_mode build break in ppc64 perf build: filter all combinations of -flto for libperl perf vendor events arm64 AmpereOneX: Fix frontend_bound calculation perf vendor events arm64: AmpereOne/AmpereOneX: Mark LD_RETIRED impacted by errata perf trace: Fix evlist memory leak perf trace: Fix BTF memory leak perf trace: Make syscall table stable perf syscalltbl: Mask off ABI type for MIPS system calls perf build: Remove Makefile.syscalls ...
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build32
-rw-r--r--tools/perf/util/addr_location.c1
-rw-r--r--tools/perf/util/addr_location.h6
-rw-r--r--tools/perf/util/annotate-data.c49
-rw-r--r--tools/perf/util/annotate-data.h13
-rw-r--r--tools/perf/util/annotate.c267
-rw-r--r--tools/perf/util/annotate.h9
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c23
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h11
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c14
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h12
-rw-r--r--tools/perf/util/arm-spe.c167
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c29
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/bpf-filter.l2
-rw-r--r--tools/perf/util/bpf_ftrace.c14
-rw-r--r--tools/perf/util/bpf_lock_contention.c85
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c28
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c245
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h7
-rw-r--r--tools/perf/util/branch.h3
-rw-r--r--tools/perf/util/callchain.c10
-rw-r--r--tools/perf/util/color.h5
-rw-r--r--tools/perf/util/color_config.c11
-rw-r--r--tools/perf/util/comm.c2
-rw-r--r--tools/perf/util/compress.h20
-rw-r--r--tools/perf/util/config.c6
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cpumap.c70
-rw-r--r--tools/perf/util/cs-etm.c31
-rw-r--r--tools/perf/util/data.c20
-rw-r--r--tools/perf/util/data.h1
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debuginfo.c6
-rw-r--r--tools/perf/util/disasm.c27
-rw-r--r--tools/perf/util/dso.c166
-rw-r--r--tools/perf/util/dso.h82
-rw-r--r--tools/perf/util/env.c2
-rw-r--r--tools/perf/util/event.c11
-rw-r--r--tools/perf/util/event.h12
-rw-r--r--tools/perf/util/events_stats.h2
-rw-r--r--tools/perf/util/evlist.c32
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/evsel.c51
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/expr.c2
-rw-r--r--tools/perf/util/ftrace.h2
-rw-r--r--tools/perf/util/header.c33
-rw-r--r--tools/perf/util/hist.c108
-rw-r--r--tools/perf/util/hist.h32
-rw-r--r--tools/perf/util/hwmon_pmu.c56
-rw-r--r--tools/perf/util/hwmon_pmu.h16
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt.c136
-rw-r--r--tools/perf/util/intel-tpebs.c2
-rw-r--r--tools/perf/util/jitdump.c10
-rw-r--r--tools/perf/util/lock-contention.h7
-rw-r--r--tools/perf/util/lzma.c29
-rw-r--r--tools/perf/util/machine.c92
-rw-r--r--tools/perf/util/machine.h8
-rw-r--r--tools/perf/util/maps.c58
-rw-r--r--tools/perf/util/mem-events.c74
-rw-r--r--tools/perf/util/mem-events.h3
-rw-r--r--tools/perf/util/mmap.c15
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/mutex.h8
-rw-r--r--tools/perf/util/parse-events.c179
-rw-r--r--tools/perf/util/parse-events.l51
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c124
-rw-r--r--tools/perf/util/pmu.c315
-rw-r--r--tools/perf/util/pmu.h12
-rw-r--r--tools/perf/util/pmus.c202
-rw-r--r--tools/perf/util/pmus.h1
-rw-r--r--tools/perf/util/probe-finder.c21
-rw-r--r--tools/perf/util/probe-finder.h1
-rw-r--r--tools/perf/util/pstack.c14
-rw-r--r--tools/perf/util/pstack.h1
-rw-r--r--tools/perf/util/python.c160
-rw-r--r--tools/perf/util/rb_resort.h146
-rw-r--r--tools/perf/util/s390-cpumsf.c6
-rw-r--r--tools/perf/util/sample.c43
-rw-r--r--tools/perf/util/sample.h11
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c29
-rw-r--r--tools/perf/util/session.c106
-rw-r--r--tools/perf/util/session.h1
-rw-r--r--tools/perf/util/setup.py10
-rw-r--r--tools/perf/util/sort.c152
-rw-r--r--tools/perf/util/sort.h3
-rw-r--r--tools/perf/util/stat-display.c111
-rw-r--r--tools/perf/util/stat-shadow.c3
-rw-r--r--tools/perf/util/stat.c13
-rw-r--r--tools/perf/util/symbol-elf.c133
-rw-r--r--tools/perf/util/symbol.c36
-rw-r--r--tools/perf/util/symbol_conf.h8
-rw-r--r--tools/perf/util/synthetic-events.c46
-rw-r--r--tools/perf/util/syscalltbl.c148
-rw-r--r--tools/perf/util/syscalltbl.h22
-rw-r--r--tools/perf/util/thread.c80
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/tool_pmu.c35
-rw-r--r--tools/perf/util/tool_pmu.h2
-rw-r--r--tools/perf/util/trace-event-scripting.c116
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/units.c2
-rw-r--r--tools/perf/util/unwind-libdw.c9
-rw-r--r--tools/perf/util/unwind-libunwind-local.c28
107 files changed, 3281 insertions, 1420 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5ec97e8d6b6d..946bce6628f3 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -67,6 +67,7 @@ perf-util-y += maps.o
perf-util-y += pstack.o
perf-util-y += session.o
perf-util-y += tool.o
+perf-util-y += sample.o
perf-util-y += sample-raw.o
perf-util-y += s390-sample-raw.o
perf-util-y += amd-sample-raw.o
@@ -405,14 +406,39 @@ $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
ifdef SHELLCHECK
SHELL_TESTS := generate-cmdlist.sh
- TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+ SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
else
SHELL_TESTS :=
- TEST_LOGS :=
+ SHELL_TEST_LOGS :=
endif
$(OUTPUT)%.shellcheck_log: %
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
-perf-util-y += $(TEST_LOGS)
+perf-util-y += $(SHELL_TEST_LOGS)
+
+PY_TESTS := setup.py
+ifdef MYPY
+ MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log)
+else
+ MYPY_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.mypy_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(MYPY_TEST_LOGS)
+
+ifdef PYLINT
+ PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log)
+else
+ PYLINT_TEST_LOGS :=
+endif
+
+$(OUTPUT)%.pylint_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-util-y += $(PYLINT_TEST_LOGS)
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
index 51825ef8c0ab..007a2f5df9a6 100644
--- a/tools/perf/util/addr_location.c
+++ b/tools/perf/util/addr_location.c
@@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
al->cpumode = 0;
al->cpu = 0;
al->socket = 0;
+ al->parallelism = 1;
}
/*
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
index d8ac0428dff2..64b551025216 100644
--- a/tools/perf/util/addr_location.h
+++ b/tools/perf/util/addr_location.h
@@ -17,10 +17,14 @@ struct addr_location {
const char *srcline;
u64 addr;
char level;
- u8 filtered;
u8 cpumode;
+ u16 filtered;
s32 cpu;
s32 socket;
+ /* Same as machine.parallelism but within [1, nr_cpus]. */
+ int parallelism;
+ /* See he_stat.latency. */
+ u64 latency;
};
void addr_location__init(struct addr_location *al);
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 976abedca09e..1ef2edbc71d9 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -314,6 +314,40 @@ static void delete_members(struct annotated_member *member)
}
}
+static int fill_member_name(char *buf, size_t sz, struct annotated_member *m,
+ int offset, bool first)
+{
+ struct annotated_member *child;
+
+ if (list_empty(&m->children))
+ return 0;
+
+ list_for_each_entry(child, &m->children, node) {
+ int len;
+
+ if (offset < child->offset || offset >= child->offset + child->size)
+ continue;
+
+ /* It can have anonymous struct/union members */
+ if (child->var_name) {
+ len = scnprintf(buf, sz, "%s%s",
+ first ? "" : ".", child->var_name);
+ first = false;
+ } else {
+ len = 0;
+ }
+
+ return fill_member_name(buf + len, sz - len, child, offset, first) + len;
+ }
+ return 0;
+}
+
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset)
+{
+ return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true);
+}
+
static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
Dwarf_Die *type_die)
{
@@ -830,7 +864,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
continue;
- if (var->reg == DWARF_REG_FB || var->reg == fbreg) {
+ if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) {
int offset = var->offset;
struct type_state_stack *stack;
@@ -845,8 +879,13 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo
findnew_stack_state(state, offset, TSR_KIND_TYPE,
&mem_die);
- pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
- insn_offset, -offset);
+ if (var->reg == state->stack_reg) {
+ pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)",
+ insn_offset, offset, state->stack_reg);
+ } else {
+ pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
+ insn_offset, -offset);
+ }
pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
} else if (has_reg_type(state, var->reg) && var->offset == 0) {
struct type_state_reg *reg;
@@ -1127,10 +1166,10 @@ again:
}
check_non_register:
- if (reg == dloc->fbreg) {
+ if (reg == dloc->fbreg || reg == state->stack_reg) {
struct type_state_stack *stack;
- pr_debug_dtp("fbreg");
+ pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack");
stack = find_stack_state(state, dloc->type_offset);
if (stack == NULL) {
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 98c80b2268dd..541fee1a5f0a 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -227,8 +227,13 @@ void annotated_data_type__tree_delete(struct rb_root *root);
/* Release all global variable information in the tree */
void global_var_type__tree_delete(struct rb_root *root);
+/* Print data type annotation (including members) on stdout */
int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
+/* Get name of member field at the given offset in the data type */
+int annotated_data_type__get_member_name(struct annotated_data_type *adt,
+ char *buf, size_t sz, int member_offset);
+
bool has_reg_type(struct type_state *state, int reg);
struct type_state_stack *findnew_stack_state(struct type_state *state,
int offset, u8 kind,
@@ -276,6 +281,14 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un
return -1;
}
+static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused,
+ char *buf __maybe_unused,
+ size_t sz __maybe_unused,
+ int member_offset __maybe_unused)
+{
+ return -1;
+}
+
#endif /* HAVE_LIBDW_SUPPORT */
#ifdef HAVE_SLANG_SUPPORT
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 31bb326b07a6..1e59b9e5339d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -87,6 +87,8 @@ struct annotated_data_type canary_type = {
},
};
+#define NO_TYPE ((struct annotated_data_type *)-1UL)
+
/* symbol histogram: key = offset << 16 | evsel->core.idx */
static size_t sym_hist_hash(long key, void *ctx __maybe_unused)
{
@@ -758,15 +760,31 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi
return 0;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset);
+
+struct annotation_print_data {
+ struct hist_entry *he;
+ struct evsel *evsel;
+ struct arch *arch;
+ struct debuginfo *dbg;
+ u64 start;
+ int addr_fmt_width;
+};
+
static int
-annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
- struct evsel *evsel, u64 len, int min_pcnt, int printed,
- int max_lines, struct annotation_line *queue, int addr_fmt_width,
- int percent_type)
+annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd,
+ struct annotation_options *opts, int printed,
+ struct annotation_line *queue)
{
+ struct symbol *sym = apd->he->ms.sym;
struct disasm_line *dl = container_of(al, struct disasm_line, al);
struct annotation *notes = symbol__annotation(sym);
static const char *prev_line;
+ int max_lines = opts->max_lines;
+ int percent_type = opts->percent_type;
if (al->offset != -1) {
double max_percent = 0.0;
@@ -786,19 +804,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (al->data_nr > nr_percent)
nr_percent = al->data_nr;
- if (max_percent < min_pcnt)
+ if (max_percent < opts->min_pcnt)
return -1;
if (max_lines && printed >= max_lines)
return 1;
if (queue != NULL) {
+ struct annotation_options queue_opts = {
+ .max_lines = 1,
+ .percent_type = percent_type,
+ };
+
list_for_each_entry_from(queue, &notes->src->source, node) {
if (queue == al)
break;
- annotation_line__print(queue, sym, start, evsel, len,
- 0, 0, 1, NULL, addr_fmt_width,
- percent_type);
+ annotation_line__print(queue, apd, &queue_opts,
+ /*printed=*/0, /*queue=*/NULL);
}
}
@@ -823,7 +845,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
printf(" : ");
- disasm_line__print(dl, start, addr_fmt_width);
+ disasm_line__print(dl, apd->start, apd->addr_fmt_width);
+
+ if (opts->code_with_type && apd->dbg) {
+ struct annotated_data_type *data_type;
+ int offset = 0;
+
+ data_type = __hist_entry__get_data_type(apd->he, apd->arch,
+ apd->dbg, dl, &offset);
+ if (data_type && data_type != NO_TYPE) {
+ char buf[4096];
+
+ printf("\t\t# data-type: %s",
+ data_type->self.type_name);
+
+ if (data_type != &stackop_type &&
+ data_type != &canary_type)
+ printf(" +%#x", offset);
+
+ if (annotated_data_type__get_member_name(data_type,
+ buf,
+ sizeof(buf),
+ offset))
+ printf(" (%s)", buf);
+ }
+ }
/*
* Also color the filename and line if needed, with
@@ -849,7 +895,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (!*al->line)
printf(" %*s:\n", width, " ");
else
- printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line);
+ printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width,
+ al->line_nr, al->line);
}
return 0;
@@ -1167,8 +1214,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
return 0;
}
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct map *map = ms->map;
struct symbol *sym = ms->sym;
struct dso *dso = map__dso(map);
@@ -1179,11 +1227,14 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
struct sym_hist *h = annotation__histogram(notes, evsel);
struct annotation_line *pos, *queue = NULL;
struct annotation_options *opts = &annotate_opts;
- u64 start = map__rip_2objdump(map, sym->start);
- int printed = 2, queue_len = 0, addr_fmt_width;
+ struct annotation_print_data apd = {
+ .he = he,
+ .evsel = evsel,
+ .start = map__rip_2objdump(map, sym->start),
+ };
+ int printed = 2, queue_len = 0;
int more = 0;
bool context = opts->context;
- u64 len;
int width = annotation__pcnt_width(notes);
int graph_dotted_len;
char buf[512];
@@ -1197,8 +1248,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
else
d_filename = basename(filename);
- len = symbol__size(sym);
-
if (evsel__is_group_event(evsel)) {
evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
@@ -1217,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
if (verbose > 0)
symbol__annotate_hits(sym, evsel);
- addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+ apd.addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source,
+ apd.start);
+ evsel__get_arch(evsel, &apd.arch);
+ apd.dbg = debuginfo__new(filename);
list_for_each_entry(pos, &notes->src->source, node) {
int err;
@@ -1227,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
queue_len = 0;
}
- err = annotation_line__print(pos, sym, start, evsel, len,
- opts->min_pcnt, printed, opts->max_lines,
- queue, addr_fmt_width, opts->percent_type);
+ err = annotation_line__print(pos, &apd, opts, printed, queue);
switch (err) {
case 0:
@@ -1260,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
}
}
+ debuginfo__delete(apd.dbg);
free(filename);
return more;
@@ -1597,8 +1648,9 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root)
annotation__calc_lines(notes, ms, root);
}
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1632,8 +1684,9 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
return 0;
}
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel)
{
+ struct map_symbol *ms = &he->ms;
struct dso *dso = map__dso(ms->map);
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
@@ -1657,7 +1710,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
print_summary(&source_line, dso__long_name(dso));
}
- symbol__annotate_printf(ms, evsel);
+ hist_entry__annotate_printf(he, evsel);
annotated_source__purge(symbol__annotation(sym)->src);
@@ -2643,6 +2696,92 @@ void debuginfo_cache__delete(void)
di_cache.dbg = NULL;
}
+static struct annotated_data_type *
+__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch,
+ struct debuginfo *dbg, struct disasm_line *dl,
+ int *type_offset)
+{
+ struct map_symbol *ms = &he->ms;
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *op_loc;
+ struct annotated_data_type *mem_type;
+ struct annotated_item_stat *istat;
+ int i;
+
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat == NULL) {
+ ann_data_stat.no_insn++;
+ return NO_TYPE;
+ }
+
+ if (annotate_get_insn_location(arch, dl, &loc) < 0) {
+ ann_data_stat.no_insn_ops++;
+ istat->bad++;
+ return NO_TYPE;
+ }
+
+ if (is_stack_operation(arch, dl)) {
+ istat->good++;
+ *type_offset = 0;
+ return &stackop_type;
+ }
+
+ for_each_insn_op_loc(&loc, i, op_loc) {
+ struct data_loc_info dloc = {
+ .arch = arch,
+ .thread = he->thread,
+ .ms = ms,
+ .ip = ms->sym->start + dl->al.offset,
+ .cpumode = he->cpumode,
+ .op = op_loc,
+ .di = dbg,
+ };
+
+ if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
+ continue;
+
+ /* PC-relative addressing */
+ if (op_loc->reg1 == DWARF_REG_PC) {
+ dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
+ op_loc->offset, dl);
+ }
+
+ /* This CPU access in kernel - pretend PC-relative addressing */
+ if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
+ op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
+ dloc.var_addr = op_loc->offset;
+ op_loc->reg1 = DWARF_REG_PC;
+ }
+
+ mem_type = find_data_type(&dloc);
+
+ if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
+ istat->good++;
+ *type_offset = 0;
+ return &canary_type;
+ }
+
+ if (mem_type)
+ istat->good++;
+ else
+ istat->bad++;
+
+ if (symbol_conf.annotate_data_sample) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ annotated_data_type__update_samples(mem_type, evsel,
+ dloc.type_offset,
+ he->stat.nr_events,
+ he->stat.period);
+ }
+ *type_offset = dloc.type_offset;
+ return mem_type ?: NO_TYPE;
+ }
+
+ /* retry with a fused instruction */
+ return NULL;
+}
+
/**
* hist_entry__get_data_type - find data type for given hist entry
* @he: hist entry
@@ -2658,12 +2797,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
struct evsel *evsel = hists_to_evsel(he->hists);
struct arch *arch;
struct disasm_line *dl;
- struct annotated_insn_loc loc;
- struct annotated_op_loc *op_loc;
struct annotated_data_type *mem_type;
struct annotated_item_stat *istat;
u64 ip = he->ip;
- int i;
ann_data_stat.total++;
@@ -2715,77 +2851,10 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
}
retry:
- istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
- if (istat == NULL) {
- ann_data_stat.no_insn++;
- return NULL;
- }
-
- if (annotate_get_insn_location(arch, dl, &loc) < 0) {
- ann_data_stat.no_insn_ops++;
- istat->bad++;
- return NULL;
- }
-
- if (is_stack_operation(arch, dl)) {
- istat->good++;
- he->mem_type_off = 0;
- return &stackop_type;
- }
-
- for_each_insn_op_loc(&loc, i, op_loc) {
- struct data_loc_info dloc = {
- .arch = arch,
- .thread = he->thread,
- .ms = ms,
- /* Recalculate IP for LOCK prefix or insn fusion */
- .ip = ms->sym->start + dl->al.offset,
- .cpumode = he->cpumode,
- .op = op_loc,
- .di = di_cache.dbg,
- };
-
- if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
- continue;
-
- /* Recalculate IP because of LOCK prefix or insn fusion */
- ip = ms->sym->start + dl->al.offset;
-
- /* PC-relative addressing */
- if (op_loc->reg1 == DWARF_REG_PC) {
- dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
- op_loc->offset, dl);
- }
-
- /* This CPU access in kernel - pretend PC-relative addressing */
- if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") &&
- op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
- dloc.var_addr = op_loc->offset;
- op_loc->reg1 = DWARF_REG_PC;
- }
-
- mem_type = find_data_type(&dloc);
-
- if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
- istat->good++;
- he->mem_type_off = 0;
- return &canary_type;
- }
-
- if (mem_type)
- istat->good++;
- else
- istat->bad++;
-
- if (symbol_conf.annotate_data_sample) {
- annotated_data_type__update_samples(mem_type, evsel,
- dloc.type_offset,
- he->stat.nr_events,
- he->stat.period);
- }
- he->mem_type_off = dloc.type_offset;
- return mem_type;
- }
+ mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl,
+ &he->mem_type_off);
+ if (mem_type)
+ return mem_type == NO_TYPE ? NULL : mem_type;
/*
* Some instructions can be fused and the actual memory access came
@@ -2805,7 +2874,9 @@ retry:
}
ann_data_stat.no_mem_ops++;
- istat->bad++;
+ istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+ if (istat)
+ istat->bad++;
return NULL;
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 98db1b88daf4..0e6e3f60a897 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -55,9 +55,11 @@ struct annotation_options {
show_asm_raw,
show_br_cntr,
annotate_src,
+ code_with_type,
full_addr;
u8 offset_level;
u8 disassemblers[MAX_DISASSEMBLERS];
+ u8 disassembler_used;
int min_pcnt;
int max_lines;
int context;
@@ -455,7 +457,6 @@ enum symbol_disassemble_errno {
int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel);
void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel);
void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel);
void annotated_source__purge(struct annotated_source *as);
@@ -464,9 +465,9 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
bool ui__has_annotation(void);
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel);
-
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel);
+int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel);
+int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel);
#ifdef HAVE_SLANG_SUPPORT
int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index ba807071d3c1..688fe6d75244 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
/* Instruction virtual address or Branch target address */
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
- index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH ||
+ index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) {
ns = SPE_ADDR_PKT_GET_NS(payload);
el = SPE_ADDR_PKT_GET_EL(payload);
@@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.virt_addr = ip;
else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS)
decoder->record.phys_addr = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH)
+ decoder->record.prev_br_tgt = ip;
break;
case ARM_SPE_COUNTER:
if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
@@ -207,6 +210,18 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_BR_COND;
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
+ decoder->record.op |= ARM_SPE_OP_BR_INDIRECT;
+ if (payload & SPE_OP_PKT_GCS)
+ decoder->record.op |= ARM_SPE_OP_BR_GCS;
+ if (SPE_OP_PKT_CR_BL(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_BL;
+ if (SPE_OP_PKT_CR_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_RET;
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET;
break;
default:
pr_err("Get packet error!\n");
@@ -238,6 +253,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
if (payload & BIT(EV_MISPRED))
decoder->record.type |= ARM_SPE_BRANCH_MISS;
+ if (payload & BIT(EV_NOT_TAKEN))
+ decoder->record.type |= ARM_SPE_BRANCH_NOT_TAKEN;
+
+ if (payload & BIT(EV_TRANSACTIONAL))
+ decoder->record.type |= ARM_SPE_IN_TXN;
+
if (payload & BIT(EV_PARTIAL_PREDICATE))
decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 4bcd627e859f..5d232188643b 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -24,6 +24,8 @@ enum arm_spe_sample_type {
ARM_SPE_REMOTE_ACCESS = 1 << 7,
ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
+ ARM_SPE_BRANCH_NOT_TAKEN = 1 << 10,
+ ARM_SPE_IN_TXN = 1 << 11,
};
enum arm_spe_op_type {
@@ -52,8 +54,12 @@ enum arm_spe_op_type {
ARM_SPE_OP_SVE_SG = 1 << 27,
/* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_COND = 1 << 16,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 17,
+ ARM_SPE_OP_BR_GCS = 1 << 18,
+ ARM_SPE_OP_BR_CR_BL = 1 << 19,
+ ARM_SPE_OP_BR_CR_RET = 1 << 20,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
};
enum arm_spe_common_data_source {
@@ -83,6 +89,7 @@ struct arm_spe_record {
u32 latency;
u64 from_ip;
u64 to_ip;
+ u64 prev_br_tgt;
u64 timestamp;
u64 virt_addr;
u64 phys_addr;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 4cef10a83962..13cadb2f1cea 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -308,6 +308,8 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
if (payload & BIT(EV_ALIGNMENT))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+ if (payload & BIT(EV_TRANSACTIONAL))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN");
if (payload & BIT(EV_PARTIAL_PREDICATE))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
if (payload & BIT(EV_EMPTY_PREDICATE))
@@ -397,10 +399,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
if (payload & SPE_OP_PKT_COND)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
-
- if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+ if (payload & SPE_OP_PKT_INDIRECT_BRANCH)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
-
+ if (payload & SPE_OP_PKT_GCS)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (SPE_OP_PKT_CR_BL(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL");
+ if (SPE_OP_PKT_CR_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET");
+ if (SPE_OP_PKT_CR_NON_BL_RET(payload))
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET");
break;
default:
/* Unknown index */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 464a912b221c..2cdf9f6da268 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -7,6 +7,7 @@
#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+#include <linux/bitfield.h>
#include <stddef.h>
#include <stdint.h>
@@ -104,6 +105,7 @@ enum arm_spe_events {
EV_LLC_MISS = 9,
EV_REMOTE_ACCESS = 10,
EV_ALIGNMENT = 11,
+ EV_TRANSACTIONAL = 16,
EV_PARTIAL_PREDICATE = 17,
EV_EMPTY_PREDICATE = 18,
};
@@ -116,8 +118,6 @@ enum arm_spe_events {
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
-#define SPE_OP_PKT_COND BIT(0)
-
#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
@@ -148,7 +148,13 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
-#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2)
+#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
+#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
+#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
+#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3)
+#define SPE_OP_PKT_GCS BIT(2)
+#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1)
+#define SPE_OP_PKT_COND BIT(0)
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 12761c39788f..2a9775649cc2 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -37,6 +37,8 @@
#include "../../arch/arm64/include/asm/cputype.h"
#define MAX_TIMESTAMP (~0ULL)
+#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -101,6 +103,7 @@ struct arm_spe_queue {
struct thread *thread;
u64 period_instructions;
u32 flags;
+ struct branch_stack *last_branch;
};
struct data_source_handle {
@@ -231,6 +234,17 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
params.get_trace = arm_spe_get_trace;
params.data = speq;
+ if (spe->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ /* Allocate up to two entries for PBT + TGT */
+ sz += sizeof(struct branch_entry) *
+ min(spe->synth_opts.last_branch_sz, 2U);
+ speq->last_branch = zalloc(sz);
+ if (!speq->last_branch)
+ goto out_free;
+ }
+
/* create new decoder */
speq->decoder = arm_spe_decoder_new(&params);
if (!speq->decoder)
@@ -240,6 +254,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
out_free:
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
return NULL;
@@ -346,6 +361,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
event->sample.header.size = sizeof(struct perf_event_header);
}
+static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ struct branch_stack *bstack = speq->last_branch;
+ struct branch_flags *bs_flags;
+ unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
+ bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
+ bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
+ size_t sz = sizeof(struct branch_stack) +
+ sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
+ int i = 0;
+
+ /* Clean up branch stack */
+ memset(bstack, 0x0, sz);
+
+ if (!have_tgt && !have_pbt)
+ return;
+
+ if (have_tgt) {
+ bstack->entries[i].from = record->from_ip;
+ bstack->entries[i].to = record->to_ip;
+
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->value = 0;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_CALL;
+ else
+ bs_flags->type |= PERF_BR_CALL;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take this case as function return.
+ */
+ } else if (record->op & ARM_SPE_OP_BR_CR_RET ||
+ record->op & ARM_SPE_OP_BR_INDIRECT) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND_RET;
+ else
+ bs_flags->type |= PERF_BR_RET;
+ } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNCOND;
+ } else {
+ if (record->op & ARM_SPE_OP_BR_COND)
+ bs_flags->type |= PERF_BR_COND;
+ else
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_MISS) {
+ bs_flags->mispred = 1;
+ bs_flags->predicted = 0;
+ } else {
+ bs_flags->mispred = 0;
+ bs_flags->predicted = 1;
+ }
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ bs_flags->not_taken = 1;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ bs_flags->in_tx = 1;
+
+ bs_flags->cycles = min(record->latency, 0xFFFFU);
+ i++;
+ }
+
+ if (have_pbt) {
+ bs_flags = &bstack->entries[i].flags;
+ bs_flags->type |= PERF_BR_UNKNOWN;
+ bstack->entries[i].to = record->prev_br_tgt;
+ i++;
+ }
+
+ bstack->nr = i;
+ bstack->hw_idx = -1ULL;
+}
+
static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
{
event->header.size = perf_event__sample_event_size(sample, type, 0);
@@ -379,8 +476,10 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -390,7 +489,9 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.data_src = data_src;
sample.weight = record->latency;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
@@ -399,8 +500,10 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -408,8 +511,11 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.addr = record->to_ip;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
@@ -418,7 +524,8 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
/*
* Handles perf instruction sampling period.
@@ -428,6 +535,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return 0;
speq->period_instructions = 0;
+ perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
@@ -438,8 +546,11 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.period = spe->instructions_sample_period;
sample.weight = record->latency;
sample.flags = speq->flags;
+ sample.branch_stack = speq->last_branch;
- return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ perf_sample__exit(&sample);
+ return ret;
}
static const struct midr_range common_ds_encoding_cpus[] = {
@@ -470,6 +581,26 @@ static void arm_spe__sample_flags(struct arm_spe_queue *speq)
if (record->type & ARM_SPE_BRANCH_MISS)
speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
+
+ if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
+ speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
+
+ if (record->type & ARM_SPE_IN_TXN)
+ speq->flags |= PERF_IP_FLAG_IN_TX;
+
+ if (record->op & ARM_SPE_OP_BR_COND)
+ speq->flags |= PERF_IP_FLAG_CONDITIONAL;
+
+ if (record->op & ARM_SPE_OP_BR_CR_BL)
+ speq->flags |= PERF_IP_FLAG_CALL;
+ else if (record->op & ARM_SPE_OP_BR_CR_RET)
+ speq->flags |= PERF_IP_FLAG_RETURN;
+ /*
+ * Indirect branch instruction without link (e.g. BR),
+ * take it as a function return.
+ */
+ else if (record->op & ARM_SPE_OP_BR_INDIRECT)
+ speq->flags |= PERF_IP_FLAG_RETURN;
}
}
@@ -669,6 +800,10 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
+ /* Only synthesize data source for LDST operations */
+ if (!is_ldst_op(record->op))
+ return 0;
+
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
else if (record->op & ARM_SPE_OP_ST)
@@ -749,6 +884,10 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
}
}
+ if (spe->synth_opts.last_branch &&
+ (spe->sample_branch || spe->sample_instructions))
+ arm_spe__prep_branch_stack(speq);
+
if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
err = arm_spe__synth_branch_sample(speq, spe->branch_id);
if (err)
@@ -767,7 +906,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
* When data_src is zero it means the record is not a memory operation,
* skip to synthesize memory sample for this case.
*/
- if (spe->sample_memory && data_src) {
+ if (spe->sample_memory && is_ldst_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
@@ -1240,6 +1379,7 @@ static void arm_spe_free_queue(void *priv)
thread__zput(speq->thread);
arm_spe_decoder_free(speq->decoder);
zfree(&speq->event_buf);
+ zfree(&speq->last_branch);
free(speq);
}
@@ -1459,6 +1599,19 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
id += 1;
}
+ if (spe->synth_opts.last_branch) {
+ if (spe->synth_opts.last_branch_sz > 2)
+ pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
+
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
+
if (spe->synth_opts.branches) {
spe->sample_branch = true;
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
index 4940be4a0569..958afe8b821e 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -4,6 +4,7 @@
#include "event.h"
#include "perf_regs.h" // SMPL_REG_MASK
#include "unwind.h"
+#include <string.h>
#define perf_event_arm_regs perf_event_arm64_regs
#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
@@ -16,8 +17,13 @@ struct entries {
static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
{
- return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
- && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+ struct regs_dump *regs;
+
+ if (callchain_param.record_mode != CALLCHAIN_FP)
+ return false;
+
+ regs = perf_sample__user_regs(sample);
+ return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
}
static int add_entry(struct unwind_entry *entry, void *arg)
@@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
{
int ret;
struct entries entries = {};
- struct regs_dump old_regs = sample->user_regs;
+ struct regs_dump old_regs, *regs;
if (!get_leaf_frame_caller_enabled(sample))
return 0;
@@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
* and set its mask. SP is not used when doing the unwinding but it
* still needs to be set to prevent failures.
*/
-
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
- sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ regs = perf_sample__user_regs(sample);
+ memcpy(&old_regs, regs, sizeof(*regs));
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
}
- if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
- sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
- sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ regs->cache_regs[PERF_REG_ARM64_SP] = 0;
}
ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
- sample->user_regs = old_regs;
+ memcpy(regs, &old_regs, sizeof(*regs));
if (ret || entries.length != 2)
return ret;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 4d1633d87eff..03211c2623de 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1173,16 +1173,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session,
if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
return 0;
+ perf_sample__init(&sample, /*all=*/false);
err = evlist__parse_sample(session->evlist, event, &sample);
if (err)
- return err;
-
- if (!sample.aux_sample.size)
- return 0;
+ goto out;
- offset += sample.aux_sample.data - (void *)event;
+ if (sample.aux_sample.size) {
+ offset += sample.aux_sample.data - (void *)event;
- return session->auxtrace->queue_data(session, &sample, NULL, offset);
+ err = session->auxtrace->queue_data(session, &sample, NULL, offset);
+ }
+out:
+ perf_sample__exit(&sample);
+ return err;
}
int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l
index f313404f95a9..6aa65ade3385 100644
--- a/tools/perf/util/bpf-filter.l
+++ b/tools/perf/util/bpf-filter.l
@@ -76,7 +76,7 @@ static int path_or_error(void)
num_dec [0-9]+
num_hex 0[Xx][0-9a-fA-F]+
space [ \t]+
-path [^ \t\n]+
+path [^ \t\n,]+
ident [_a-zA-Z][_a-zA-Z0-9]+
%%
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 25fc280e414a..7324668cc83e 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
skel->rodata->bucket_range = ftrace->bucket_range;
skel->rodata->min_latency = ftrace->min_latency;
+ skel->rodata->bucket_num = ftrace->bucket_num;
+ if (ftrace->bucket_range && ftrace->bucket_num) {
+ bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
+ }
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
@@ -124,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
return 0;
}
-int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
int buckets[], struct stats *stats)
{
int i, fd, err;
@@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
if (hist == NULL)
return -ENOMEM;
- for (idx = 0; idx < NUM_BUCKET; idx++) {
+ for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
err = bpf_map_lookup_elem(fd, &idx, hist);
if (err) {
buckets[idx] = 0;
@@ -154,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
stats->n = skel->bss->count;
stats->max = skel->bss->max;
stats->min = skel->bss->min;
+
+ if (!ftrace->use_nsec) {
+ stats->mean /= 1000;
+ stats->max /= 1000;
+ stats->min /= 1000;
+ }
}
free(hist);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index fc8666222399..5af8f6d1bc95 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -131,10 +131,20 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.task_data, 1);
- if (con->save_callstack)
+ if (con->save_callstack) {
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
- else
+ if (con->owner) {
+ bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64));
+ bpf_map__set_key_size(skel->maps.owner_stacks,
+ con->max_stack * sizeof(u64));
+ bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries);
+ skel->rodata->max_stack = con->max_stack;
+ }
+ } else {
bpf_map__set_max_entries(skel->maps.stacks, 1);
+ }
if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
@@ -450,7 +460,6 @@ static const char *lock_contention_get_name(struct lock_contention *con,
{
int idx = 0;
u64 addr;
- const char *name = "";
static char name_buf[KSYM_NAME_LEN];
struct symbol *sym;
struct map *kmap;
@@ -465,13 +474,14 @@ static const char *lock_contention_get_name(struct lock_contention *con,
if (pid) {
struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
- if (t == NULL)
- return name;
- if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
- thread__set_comm(t, task.comm, /*timestamp=*/0))
- name = task.comm;
+ if (t != NULL &&
+ !bpf_map_lookup_elem(task_fd, &pid, &task) &&
+ thread__set_comm(t, task.comm, /*timestamp=*/0)) {
+ snprintf(name_buf, sizeof(name_buf), "%s", task.comm);
+ return name_buf;
+ }
}
- return name;
+ return "";
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
@@ -539,6 +549,63 @@ static const char *lock_contention_get_name(struct lock_contention *con,
return name_buf;
}
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con)
+{
+ int stacks_fd, stat_fd;
+ u64 *stack_trace = NULL;
+ s32 stack_id;
+ struct contention_key ckey = {};
+ struct contention_data cdata = {};
+ size_t stack_size = con->max_stack * sizeof(*stack_trace);
+ struct lock_stat *st = NULL;
+
+ stacks_fd = bpf_map__fd(skel->maps.owner_stacks);
+ stat_fd = bpf_map__fd(skel->maps.owner_stat);
+ if (!stacks_fd || !stat_fd)
+ goto out_err;
+
+ stack_trace = zalloc(stack_size);
+ if (stack_trace == NULL)
+ goto out_err;
+
+ if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace))
+ goto out_err;
+
+ bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id);
+ ckey.stack_id = stack_id;
+ bpf_map_lookup_elem(stat_fd, &ckey, &cdata);
+
+ st = zalloc(sizeof(struct lock_stat));
+ if (!st)
+ goto out_err;
+
+ st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) :
+ "unknown");
+ if (!st->name)
+ goto out_err;
+
+ st->flags = cdata.flags;
+ st->nr_contended = cdata.count;
+ st->wait_time_total = cdata.total_time;
+ st->wait_time_max = cdata.max_time;
+ st->wait_time_min = cdata.min_time;
+ st->callstack = stack_trace;
+
+ if (cdata.count)
+ st->avg_wait_time = cdata.total_time / cdata.count;
+
+ bpf_map_delete_elem(stacks_fd, stack_trace);
+ bpf_map_delete_elem(stat_fd, &ckey);
+
+ return st;
+
+out_err:
+ free(stack_trace);
+ free(st);
+
+ return NULL;
+}
+
int lock_contention_read(struct lock_contention *con)
{
int fd, stack, err = 0;
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index fb144811b34f..e731a79a753a 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -50,6 +50,7 @@ const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
+const volatile unsigned int bucket_num = NUM_BUCKET;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -101,6 +102,7 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
+ __u64 val = delta;
__u32 key = 0;
__u64 *hist;
@@ -110,30 +112,27 @@ int BPF_PROG(func_end)
return 0;
if (bucket_range != 0) {
- delta /= cmp_base;
+ val = delta / cmp_base;
if (min_latency > 0) {
- if (delta > min_latency)
- delta -= min_latency;
+ if (val > min_latency)
+ val -= min_latency;
else
goto do_lookup;
}
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
- if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
- // clang 12 doesn't like s64 / u32 division
- key = (__u64)delta / bucket_range + 1;
- if (key >= NUM_BUCKET ||
- delta >= max_latency - min_latency)
- key = NUM_BUCKET - 1;
+ if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = val / bucket_range + 1;
+ if (key >= bucket_num)
+ key = bucket_num - 1;
}
- delta += min_latency;
goto do_lookup;
}
// calculate index using delta
- for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ for (key = 0; key < (bucket_num - 1); key++) {
if (delta < (cmp_base << key))
break;
}
@@ -143,12 +142,9 @@ do_lookup:
if (!hist)
return 0;
- *hist += 1;
+ __sync_fetch_and_add(hist, 1);
- if (bucket_range == 0)
- delta /= cmp_base;
-
- __sync_fetch_and_add(&total, delta);
+ __sync_fetch_and_add(&total, delta); // always in nsec
__sync_fetch_and_add(&count, 1);
if (delta > max)
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
index cbd79bc4b330..9ce9c8dddc4b 100644
--- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
@@ -80,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1,
for (i = 0; i < sz; i++) {
ret = (unsigned char)s1[i] - (unsigned char)s2[i];
- if (ret || !s1[i] || !s2[i])
+ if (ret || !s1[i])
break;
}
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 6533ea9b044c..69be7a4234e0 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -27,6 +27,38 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");
+/* buffer for owner stacktrace */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 1);
+} stack_buf SEC(".maps");
+
+/* a map for tracing owner stacktrace to owner stack id */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // owner stacktrace
+ __uint(value_size, sizeof(__s32)); // owner stack id
+ __uint(max_entries, 1);
+} owner_stacks SEC(".maps");
+
+/* a map for tracing lock address to owner data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64)); // lock address
+ __uint(value_size, sizeof(struct owner_tracing_data));
+ __uint(max_entries, 1);
+} owner_data SEC(".maps");
+
+/* a map for contention_key (stores owner stack id) to contention data */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct contention_key));
+ __uint(value_size, sizeof(struct contention_data));
+ __uint(max_entries, 1);
+} owner_stat SEC(".maps");
+
/* maintain timestamp at the beginning of contention */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
@@ -143,6 +175,7 @@ const volatile int needs_callstack;
const volatile int stack_skip;
const volatile int lock_owner;
const volatile int use_cgroup_v2;
+const volatile int max_stack;
/* determine the key of lock stat */
const volatile int aggr_mode;
@@ -164,6 +197,9 @@ int data_fail;
int task_map_full;
int data_map_full;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak;
+void bpf_task_release(struct task_struct *p) __ksym __weak;
+
static inline __u64 get_current_cgroup_id(void)
{
struct task_struct *task;
@@ -387,6 +423,61 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
return pelem;
}
+static inline s32 get_owner_stack_id(u64 *stacktrace)
+{
+ s32 *id, new_id;
+ static s64 id_gen = 1;
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ new_id = (s32)__sync_fetch_and_add(&id_gen, 1);
+
+ bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST);
+
+ id = bpf_map_lookup_elem(&owner_stacks, stacktrace);
+ if (id)
+ return *id;
+
+ return -1;
+}
+
+static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
+{
+ __sync_fetch_and_add(&data->total_time, duration);
+ __sync_fetch_and_add(&data->count, count);
+
+ /* FIXME: need atomic operations */
+ if (data->max_time < duration)
+ data->max_time = duration;
+ if (data->min_time > duration)
+ data->min_time = duration;
+}
+
+static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
+{
+ struct contention_key key = {
+ .stack_id = id,
+ .pid = 0,
+ .lock_addr_or_cgroup = 0,
+ };
+ struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key);
+
+ if (!data) {
+ struct contention_data first = {
+ .total_time = duration,
+ .max_time = duration,
+ .min_time = duration,
+ .count = 1,
+ .flags = flags,
+ };
+ bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST);
+ } else {
+ update_contention_data(data, duration, 1);
+ }
+}
+
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
@@ -404,6 +495,72 @@ int contention_begin(u64 *ctx)
pelem->flags = (__u32)ctx[1];
if (needs_callstack) {
+ u32 i = 0;
+ u32 id = 0;
+ int owner_pid;
+ u64 *buf;
+ struct task_struct *task;
+ struct owner_tracing_data *otdata;
+
+ if (!lock_owner)
+ goto skip_owner;
+
+ task = get_lock_owner(pelem->lock, pelem->flags);
+ if (!task)
+ goto skip_owner;
+
+ owner_pid = BPF_CORE_READ(task, pid);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < max_stack; i++)
+ buf[i] = 0x0;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(owner_pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+ id = get_owner_stack_id(buf);
+
+ /*
+ * Contention just happens, or corner case `lock` is owned by process not
+ * `owner_pid`. For the corner case we treat it as unexpected internal error and
+ * just ignore the precvious tracing record.
+ */
+ if (!otdata || otdata->pid != owner_pid) {
+ struct owner_tracing_data first = {
+ .pid = owner_pid,
+ .timestamp = pelem->timestamp,
+ .count = 1,
+ .stack_id = id,
+ };
+ bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY);
+ }
+ /* Contention is ongoing and new waiter joins */
+ else {
+ __sync_fetch_and_add(&otdata->count, 1);
+
+ /*
+ * The owner is the same, but stacktrace might be changed. In this case we
+ * store/update `owner_stat` based on current owner stack id.
+ */
+ if (id != otdata->stack_id) {
+ update_owner_stat(id, pelem->timestamp - otdata->timestamp,
+ pelem->flags);
+
+ otdata->timestamp = pelem->timestamp;
+ otdata->stack_id = id;
+ }
+ }
+skip_owner:
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
@@ -440,6 +597,7 @@ int contention_end(u64 *ctx)
struct tstamp_data *pelem;
struct contention_key key = {};
struct contention_data *data;
+ __u64 timestamp;
__u64 duration;
bool need_delete = false;
@@ -467,12 +625,88 @@ int contention_end(u64 *ctx)
need_delete = true;
}
- duration = bpf_ktime_get_ns() - pelem->timestamp;
+ timestamp = bpf_ktime_get_ns();
+ duration = timestamp - pelem->timestamp;
if ((__s64)duration < 0) {
__sync_fetch_and_add(&time_fail, 1);
goto out;
}
+ if (needs_callstack && lock_owner) {
+ struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
+
+ if (!otdata)
+ goto skip_owner;
+
+ /* Update `owner_stat` */
+ update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags);
+
+ /* No contention is occurring, delete `lock` entry in `owner_data` */
+ if (otdata->count <= 1)
+ bpf_map_delete_elem(&owner_data, &pelem->lock);
+ /*
+ * Contention is still ongoing, with a new owner (current task). `owner_data`
+ * should be updated accordingly.
+ */
+ else {
+ u32 i = 0;
+ s32 ret = (s32)ctx[1];
+ u64 *buf;
+
+ otdata->timestamp = timestamp;
+ __sync_fetch_and_add(&otdata->count, -1);
+
+ buf = bpf_map_lookup_elem(&stack_buf, &i);
+ if (!buf)
+ goto skip_owner;
+ for (i = 0; i < (u32)max_stack; i++)
+ buf[i] = 0x0;
+
+ /*
+ * `ret` has the return code of the lock function.
+ * If `ret` is negative, the current task terminates lock waiting without
+ * acquiring it. Owner is not changed, but we still need to update the owner
+ * stack.
+ */
+ if (ret < 0) {
+ s32 id = 0;
+ struct task_struct *task;
+
+ if (!bpf_task_from_pid)
+ goto skip_owner;
+
+ task = bpf_task_from_pid(otdata->pid);
+ if (!task)
+ goto skip_owner;
+
+ bpf_get_task_stack(task, buf,
+ max_stack * sizeof(unsigned long), 0);
+ bpf_task_release(task);
+
+ id = get_owner_stack_id(buf);
+
+ /*
+ * If owner stack is changed, update owner stack id for this lock.
+ */
+ if (id != otdata->stack_id)
+ otdata->stack_id = id;
+ }
+ /*
+ * Otherwise, update tracing data with the current task, which is the new
+ * owner.
+ */
+ else {
+ otdata->pid = pid;
+ /*
+ * We don't want to retrieve callstack here, since it is where the
+ * current task acquires the lock and provides no additional
+ * information. We simply assign -1 to invalidate it.
+ */
+ otdata->stack_id = -1;
+ }
+ }
+ }
+skip_owner:
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
key.stack_id = pelem->stack_id;
@@ -556,14 +790,7 @@ int contention_end(u64 *ctx)
}
found:
- __sync_fetch_and_add(&data->total_time, duration);
- __sync_fetch_and_add(&data->count, 1);
-
- /* FIXME: need atomic operations */
- if (data->max_time < duration)
- data->max_time = duration;
- if (data->min_time > duration)
- data->min_time = duration;
+ update_contention_data(data, duration, 1);
out:
pelem->lock = 0;
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index c15f734d7fc4..15f5743bd409 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -3,6 +3,13 @@
#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
#define UTIL_BPF_SKEL_LOCK_DATA_H
+struct owner_tracing_data {
+ u32 pid; // Who has the lock.
+ u32 count; // How many waiters for this lock.
+ u64 timestamp; // The time while the owner acquires lock and contention is going on.
+ s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks`
+};
+
struct tstamp_data {
u64 timestamp;
u64 lock;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index b80c12c74bbb..7429530fa774 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -25,7 +25,8 @@ struct branch_flags {
u64 spec:2;
u64 new_type:4;
u64 priv:3;
- u64 reserved:31;
+ u64 not_taken:1;
+ u64 reserved:30;
};
};
};
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 0c7564747a14..d7b7eef740b9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -589,9 +589,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
return -ENOMEM;
}
call->ip = cursor_node->ip;
- call->ms = cursor_node->ms;
- call->ms.map = map__get(call->ms.map);
- call->ms.maps = maps__get(call->ms.maps);
+ map_symbol__copy(&call->ms, &cursor_node->ms);
call->srcline = cursor_node->srcline;
if (cursor_node->branch) {
@@ -1094,9 +1092,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->ip = ip;
map_symbol__exit(&node->ms);
- node->ms = *ms;
- node->ms.maps = maps__get(ms->maps);
- node->ms.map = map__get(ms->map);
+ map_symbol__copy(&node->ms, ms);
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
@@ -1564,7 +1560,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
goto out;
*new = *chain;
new->has_children = false;
- new->ms.map = map__get(new->ms.map);
+ map_symbol__copy(&new->ms, &chain->ms);
list_add_tail(&new->list, &head);
}
parent = parent->parent;
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 9a7248dbe2d7..0319546decca 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -30,11 +30,6 @@
extern int perf_use_color_default;
-/*
- * Use this instead of perf_default_config if you need the value of color.ui.
- */
-int perf_color_default_config(const char *var, const char *value, void *cb);
-
int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
int color_vsnprintf(char *bf, size_t size, const char *color,
const char *fmt, va_list args);
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
index dc09ba7cb31e..301031ddc025 100644
--- a/tools/perf/util/color_config.c
+++ b/tools/perf/util/color_config.c
@@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
}
return 0;
}
-
-int perf_color_default_config(const char *var, const char *value,
- void *cb __maybe_unused)
-{
- if (!strcmp(var, "color.ui")) {
- perf_use_color_default = perf_config_colorbool(var, value, -1);
- return 0;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 49b79cf0c5cc..8aa456d7c2cd 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -5,6 +5,8 @@
#include <internal/rc_check.h>
#include <linux/refcount.h>
#include <linux/zalloc.h>
+#include <tools/libc_compat.h> // reallocarray
+
#include "rwsem.h"
DECLARE_RC_STRUCT(comm_str) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index b29109cd3609..6cfecfca16f2 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -4,7 +4,9 @@
#include <stdbool.h>
#include <stddef.h>
+#include <stdio.h>
#include <sys/types.h>
+#include <linux/compiler.h>
#ifdef HAVE_ZSTD_SUPPORT
#include <zstd.h>
#endif
@@ -15,8 +17,26 @@ bool gzip_is_compressed(const char *input);
#endif
#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_stream_to_file(FILE *input, int output_fd);
int lzma_decompress_to_file(const char *input, int output_fd);
bool lzma_is_compressed(const char *input);
+#else
+static inline
+int lzma_decompress_stream_to_file(FILE *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline
+int lzma_decompress_to_file(const char *input __maybe_unused,
+ int output_fd __maybe_unused)
+{
+ return -1;
+}
+static inline int lzma_is_compressed(const char *input __maybe_unused)
+{
+ return false;
+}
#endif
struct zstd_data {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 2d07c9257a1a..ae72b66b6ded 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -856,12 +856,6 @@ void perf_config__exit(void)
config_set = NULL;
}
-void perf_config__refresh(void)
-{
- perf_config__exit();
- perf_config__init();
-}
-
static void perf_config_item__delete(struct perf_config_item *item)
{
zfree(&item->name);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index a727c95cb119..987b47cf54c3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -49,7 +49,6 @@ void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
void perf_config__exit(void);
-void perf_config__refresh(void);
int perf_config__set_variable(const char *var, const char *value);
/**
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 5c329ad614e9..89570397a4b3 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
struct perf_cpu_map *map;
map = perf_cpu_map__empty_new(data->cpus_data.nr);
- if (map) {
- unsigned i;
-
- for (i = 0; i < data->cpus_data.nr; i++) {
- /*
- * Special treatment for -1, which is not real cpu number,
- * and we need to use (int) -1 to initialize map[i],
- * otherwise it would become 65535.
- */
- if (data->cpus_data.cpu[i] == (u16) -1)
- RC_CHK_ACCESS(map)->map[i].cpu = -1;
- else
- RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
+ if (!map)
+ return NULL;
+
+ for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (data->cpus_data.cpu[i] == (u16) -1) {
+ RC_CHK_ACCESS(map)->map[i].cpu = -1;
+ } else if (data->cpus_data.cpu[i] < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
+ } else {
+ pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
+ perf_cpu_map__put(map);
+ return NULL;
}
}
@@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
int cpu;
perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
- for_each_set_bit(cpu, local_copy, 64)
- RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ for_each_set_bit(cpu, local_copy, 64) {
+ if (cpu + cpus_per_i < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
}
return map;
@@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
RC_CHK_ACCESS(map)->map[i++].cpu = -1;
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
- i++, cpu++)
- RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ i++, cpu++) {
+ if (cpu < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
return map;
}
@@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
- int ret = -1;
+ int max, ret = -1;
/* set up default */
max_cpu_num.cpu = 4096;
@@ -444,10 +462,12 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num.cpu);
+ ret = get_max_num(path, &max);
if (ret)
goto out;
+ max_cpu_num.cpu = max;
+
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) {
@@ -455,8 +475,14 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num.cpu);
+ ret = get_max_num(path, &max);
+ if (!ret && max > INT16_MAX) {
+ pr_err("Read out of bounds max cpus of %d\n", max);
+ ret = -1;
+ }
+ if (!ret)
+ max_present_cpu_num.cpu = (int16_t)max;
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
- struct perf_cpu cpu = { .cpu = INT_MAX };
+ struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map);
if (!last)
@@ -696,7 +722,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
if (!online)
online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
- return online;
+ return perf_cpu_map__get(online);
}
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 0bf9e5c27b59..30f4bb3e7fa3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -506,20 +506,27 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
err = evsel__parse_sample(evsel, event, &sample);
if (err)
- return err;
+ goto out;
cpu = sample.cpu;
if (cpu == -1) {
/* no CPU in the sample - possibly recorded with an old version of perf */
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
- if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
- return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
+ err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ goto out;
+ }
- return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+ err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
+out:
+ perf_sample__exit(&sample);
+ return err;
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -1560,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
union perf_event *event = tidq->event_buf;
- struct perf_sample sample = {.ip = 0,};
+ struct perf_sample sample;
+ perf_sample__init(&sample, /*all=*/true);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1598,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
@@ -3151,9 +3160,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
+ perf_sample__init(&sample, /*all=*/false);
ret = evsel__parse_sample(evsel, event, &sample);
if (ret)
- return ret;
+ goto out;
/*
* Loop through the auxtrace index to find the buffer that matches up with this aux event.
@@ -3168,7 +3178,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
* 1 ('not found')
*/
if (ret != 1)
- return ret;
+ goto out;
}
}
@@ -3178,7 +3188,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf
*/
pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
" tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
- return 0;
+ ret = 0;
+out:
+ perf_sample__exit(&sample);
+ return ret;
}
static int cs_etm__queue_aux_records(struct perf_session *session)
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 98661ede2a73..164eb45a0b36 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -158,26 +158,6 @@ out_err:
return ret;
}
-int perf_data__update_dir(struct perf_data *data)
-{
- int i;
-
- if (WARN_ON(!data->is_dir))
- return -EINVAL;
-
- for (i = 0; i < data->dir.nr; i++) {
- struct perf_data_file *file = &data->dir.files[i];
- struct stat st;
-
- if (fstat(file->fd, &st))
- return -1;
-
- file->size = st.st_size;
- }
-
- return 0;
-}
-
static bool check_pipe(struct perf_data *data)
{
struct stat st;
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 110f3ebde30f..1438e32e0451 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -97,7 +97,6 @@ int perf_data__switch(struct perf_data *data,
int perf_data__create_dir(struct perf_data *data, int nr);
int perf_data__open_dir(struct perf_data *data);
void perf_data__close_dir(struct perf_data *data);
-int perf_data__update_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
bool has_kcore_dir(const char *path);
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 995f6bb05b5f..f9ef7d045c92 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,8 +46,8 @@ int debug_type_profile;
FILE *debug_file(void)
{
if (!_debug_file) {
- pr_warning_once("debug_file not set");
debug_set_file(stderr);
+ pr_warning_once("debug_file not set");
}
return _debug_file;
}
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index 19acf4775d35..b5deea7cbdf2 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -125,8 +125,12 @@ struct debuginfo *debuginfo__new(const char *path)
dso__put(dso);
out:
+ if (dinfo)
+ return dinfo;
+
/* if failed to open all distro debuginfo, open given binary */
- return dinfo ? : __debuginfo__new(path);
+ symbol__join_symfs(buf, path);
+ return __debuginfo__new(buf);
}
void debuginfo__delete(struct debuginfo *dbg)
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 50c5c206b70e..8f0eb56c6fc6 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -48,7 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
-static int disasm_line__parse_powerpc(struct disasm_line *dl);
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args);
static char *expand_tabs(char *line, char **storage, size_t *storage_len);
static __attribute__((constructor)) void symbol__init_regexpr(void)
@@ -968,24 +968,25 @@ out:
#define PPC_OP(op) (((op) >> 26) & 0x3F)
#define RAW_BYTES 11
-static int disasm_line__parse_powerpc(struct disasm_line *dl)
+static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args)
{
char *line = dl->al.line;
const char **namep = &dl->ins.name;
char **rawp = &dl->ops.raw;
char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
char *name = skip_spaces(name_raw_insn + RAW_BYTES);
- int objdump = 0;
+ int disasm = 0;
+ int ret = 0;
- if (strlen(line) > RAW_BYTES)
- objdump = 1;
+ if (args->options->disassembler_used)
+ disasm = 1;
if (name_raw_insn[0] == '\0')
return -1;
- if (objdump) {
- disasm_line__parse(name, namep, rawp);
- } else
+ if (disasm)
+ ret = disasm_line__parse(name, namep, rawp);
+ else
*namep = "";
tmp_raw_insn = strndup(name_raw_insn, 11);
@@ -995,10 +996,10 @@ static int disasm_line__parse_powerpc(struct disasm_line *dl)
remove_spaces(tmp_raw_insn);
sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
- if (objdump)
+ if (disasm)
dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
- return 0;
+ return ret;
}
static void annotation_line__init(struct annotation_line *al,
@@ -1054,7 +1055,7 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
if (args->offset != -1) {
if (arch__is(args->arch, "powerpc")) {
- if (disasm_line__parse_powerpc(dl) < 0)
+ if (disasm_line__parse_powerpc(dl, args) < 0)
goto out_free_line;
} else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
goto out_free_line;
@@ -2289,16 +2290,20 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
switch (dis) {
case PERF_DISASM_LLVM:
+ args->options->disassembler_used = PERF_DISASM_LLVM;
err = symbol__disassemble_llvm(symfs_filename, sym, args);
break;
case PERF_DISASM_CAPSTONE:
+ args->options->disassembler_used = PERF_DISASM_CAPSTONE;
err = symbol__disassemble_capstone(symfs_filename, sym, args);
break;
case PERF_DISASM_OBJDUMP:
+ args->options->disassembler_used = PERF_DISASM_OBJDUMP;
err = symbol__disassemble_objdump(symfs_filename, sym, args);
break;
case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
default:
+ args->options->disassembler_used = PERF_DISASM_UNKNOWN;
goto out_remove_tmp;
}
if (err == 0)
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5c6e85fdae0d..8619b6eea62d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -67,6 +67,7 @@ char dso__symtab_origin(const struct dso *dso)
[DSO_BINARY_TYPE__GUEST_KMODULE] = 'G',
[DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M',
[DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V',
+ [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n',
};
if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND)
@@ -93,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso)
case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
case DSO_BINARY_TYPE__GUEST_KMODULE:
case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -224,6 +226,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__VMLINUX:
case DSO_BINARY_TYPE__GUEST_VMLINUX:
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
__symbol__join_symfs(filename, size, dso__long_name(dso));
break;
@@ -490,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
/*
* Global list of open DSOs and the counter.
*/
+struct mutex _dso__data_open_lock;
static LIST_HEAD(dso__data_open);
-static long dso__data_open_cnt;
-static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock);
-static void dso__list_add(struct dso *dso)
+static void dso__data_open_lock_init(void)
+{
+ mutex_init(&_dso__data_open_lock);
+}
+
+static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock)
+{
+ static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&data_open_lock_once, dso__data_open_lock_init);
+
+ return &_dso__data_open_lock;
+}
+
+static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_add_tail(&dso__data(dso)->open_entry, &dso__data_open);
#ifdef REFCNT_CHECKING
@@ -505,11 +522,13 @@ static void dso__list_add(struct dso *dso)
dso__data_open_cnt++;
}
-static void dso__list_del(struct dso *dso)
+static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
list_del_init(&dso__data(dso)->open_entry);
#ifdef REFCNT_CHECKING
+ mutex_unlock(dso__data_open_lock());
dso__put(dso__data(dso)->dso);
+ mutex_lock(dso__data_open_lock());
#endif
WARN_ONCE(dso__data_open_cnt <= 0,
"DSO data fd counter out of bounds.");
@@ -518,7 +537,7 @@ static void dso__list_del(struct dso *dso)
static void close_first_dso(void);
-static int do_open(char *name)
+static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
char sbuf[STRERR_BUFSIZE];
@@ -545,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename)
}
static int __open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd = -EINVAL;
char *root_dir = (char *)"";
@@ -610,6 +630,7 @@ static void check_data_close(void);
* list/count of open DSO objects.
*/
static int open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
int fd;
struct nscookie nsc;
@@ -635,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine)
return fd;
}
-static void close_data_fd(struct dso *dso)
+static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (dso__data(dso)->fd >= 0) {
close(dso__data(dso)->fd);
@@ -652,12 +673,12 @@ static void close_data_fd(struct dso *dso)
* Close @dso's data file descriptor and updates
* list/count of open DSO objects.
*/
-static void close_dso(struct dso *dso)
+static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
close_data_fd(dso);
}
-static void close_first_dso(void)
+static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
struct dso_data *dso_data;
struct dso *dso;
@@ -702,7 +723,7 @@ void reset_fd_limit(void)
fd_limit = 0;
}
-static bool may_cache_fd(void)
+static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
if (!fd_limit)
fd_limit = get_fd_limit();
@@ -718,7 +739,7 @@ static bool may_cache_fd(void)
* for opened dso file descriptors. The limit is half
* of the RLIMIT_NOFILE files opened.
*/
-static void check_data_close(void)
+static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
bool cache_fd = may_cache_fd();
@@ -734,12 +755,13 @@ static void check_data_close(void)
*/
void dso__data_close(struct dso *dso)
{
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
close_dso(dso);
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
static void try_to_open_dso(struct dso *dso, struct machine *machine)
+ EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
{
enum dso_binary_type binary_type_data[] = {
DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -781,25 +803,27 @@ out:
* returns file descriptor. It should be paired with
* dso__data_put_fd() if it returns non-negative value.
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine)
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
{
+ *fd = -1;
if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR)
- return -1;
+ return false;
- if (pthread_mutex_lock(&dso__data_open_lock) < 0)
- return -1;
+ mutex_lock(dso__data_open_lock());
try_to_open_dso(dso, machine);
- if (dso__data(dso)->fd < 0)
- pthread_mutex_unlock(&dso__data_open_lock);
+ *fd = dso__data(dso)->fd;
+ if (*fd >= 0)
+ return true;
- return dso__data(dso)->fd;
+ mutex_unlock(dso__data_open_lock());
+ return false;
}
void dso__data_put_fd(struct dso *dso __maybe_unused)
{
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
}
bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
@@ -951,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
{
ssize_t ret;
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -967,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset);
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1075,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine)
struct stat st;
char sbuf[STRERR_BUFSIZE];
- pthread_mutex_lock(&dso__data_open_lock);
+ mutex_lock(dso__data_open_lock());
/*
* dso__data(dso)->fd might be closed if other thread opened another
@@ -1099,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine)
dso__data(dso)->file_size = st.st_size;
out:
- pthread_mutex_unlock(&dso__data_open_lock);
+ mutex_unlock(dso__data_open_lock());
return ret;
}
@@ -1170,6 +1194,68 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
return data_read_write_offset(dso, machine, offset, data, size, true);
}
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine)
+{
+ uint16_t e_machine = EM_NONE;
+ int fd;
+
+ switch (dso__binary_type(dso)) {
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ return EM_HOST;
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ break;
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return EM_NONE;
+ }
+
+ mutex_lock(dso__data_open_lock());
+
+ /*
+ * dso__data(dso)->fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+ fd = dso__data(dso)->fd;
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (dso__needs_swap(dso) == DSO_SWAP__UNSET) {
+ unsigned char eidata;
+
+ if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata))
+ dso__swap_init(dso, eidata);
+ }
+ if (dso__needs_swap(dso) != DSO_SWAP__UNSET &&
+ pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine))
+ e_machine = DSO__SWAP(dso, uint16_t, e_machine);
+ }
+ mutex_unlock(dso__data_open_lock());
+ return e_machine;
+}
+
/**
* dso__data_read_addr - Read data from dso address
* @dso: dso object
@@ -1525,6 +1611,33 @@ void dso__put(struct dso *dso)
RC_CHK_PUT(dso);
}
+int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+ static unsigned int const endian = 1;
+
+ dso__set_needs_swap(dso, DSO_SWAP__NO);
+
+ switch (eidata) {
+ case ELFDATA2LSB:
+ /* We are big endian, DSO is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, DSO is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ dso__set_needs_swap(dso, DSO_SWAP__YES);
+ break;
+
+ default:
+ pr_err("unrecognized DSO data encoding %d\n", eidata);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void dso__set_build_id(struct dso *dso, struct build_id *bid)
{
RC_CHK_ACCESS(dso)->bid = *bid;
@@ -1608,11 +1721,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
enum dso_type dso__type(struct dso *dso, struct machine *machine)
{
- int fd;
+ int fd = -1;
enum dso_type type = DSO__TYPE_UNKNOWN;
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
type = dso__type_fd(fd);
dso__data_put_fd(dso);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index bb8e8f444054..c87564471f9b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -20,30 +20,88 @@ struct perf_env;
#define DSO__NAME_KALLSYMS "[kernel.kallsyms]"
#define DSO__NAME_KCORE "[kernel.kcore]"
+/**
+ * enum dso_binary_type - The kind of DSO generally associated with a memory
+ * region (struct map).
+ */
enum dso_binary_type {
+ /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */
DSO_BINARY_TYPE__KALLSYMS = 0,
+ /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */
DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__VMLINUX,
+ /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */
DSO_BINARY_TYPE__GUEST_VMLINUX,
+ /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */
DSO_BINARY_TYPE__JAVA_JIT,
+ /**
+ * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path
+ * in the .gnu_debuglink ELF section of the dso.
+ */
DSO_BINARY_TYPE__DEBUGLINK,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in
+ * the buildid cache with an elf filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ /**
+ * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid
+ * located in the buildid cache with a debug filename.
+ */
DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug
+ * with .debug suffix.
+ */
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo
+ * file in /usr/lib/debug/lib rather than the expected
+ * /usr/lib/debug/usr/lib.
+ */
DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located
+ * in /usr/lib/debug/.build-id/.
+ */
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ /**
+ * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed
+ * ELF file is placed in a .gnu_debugdata section.
+ */
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */
DSO_BINARY_TYPE__GUEST_KMODULE,
+ /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */
DSO_BINARY_TYPE__KCORE,
+ /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */
DSO_BINARY_TYPE__GUEST_KCORE,
+ /**
+ * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg
+ * package debug info.
+ */
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */
DSO_BINARY_TYPE__BPF_IMAGE,
+ /**
+ * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced
+ * instructions or optimized kprobes or ftrace trampolines.
+ */
DSO_BINARY_TYPE__OOL,
+ /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -154,10 +212,12 @@ struct dso_data {
int status;
u32 status_seen;
u64 file_size;
+#ifdef HAVE_LIBUNWIND_SUPPORT
u64 elf_base_addr;
u64 debug_frame_offset;
u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
+#endif
};
struct dso_bpf_prog {
@@ -231,6 +291,8 @@ DECLARE_RC_STRUCT(dso) {
char name[];
};
+extern struct mutex _dso__data_open_lock;
+
/* dso__for_each_symbol - iterate over the symbols of given type
*
* @dso: the 'struct dso *' in which symbols are iterated
@@ -652,7 +714,7 @@ void __dso__inject_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
struct dso *dso__get(struct dso *dso);
-void dso__put(struct dso *dso);
+void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
static inline void __dso__zput(struct dso **dso)
{
@@ -675,6 +737,8 @@ bool dso__sorted_by_name(const struct dso *dso);
void dso__set_sorted_by_name(struct dso *dso);
void dso__sort_by_name(struct dso *dso);
+int dso__swap_init(struct dso *dso, unsigned char eidata);
+
void dso__set_build_id(struct dso *dso, struct build_id *bid);
bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
@@ -732,8 +796,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
* The current usage of the dso__data_* interface is as follows:
*
* Get DSO's fd:
- * int fd = dso__data_get_fd(dso, machine);
- * if (fd >= 0) {
+ * int fd;
+ * if (dso__data_get_fd(dso, machine, &fd)) {
* USE 'fd' SOMEHOW
* dso__data_put_fd(dso);
* }
@@ -755,14 +819,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
*
* TODO
*/
-int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso);
-void dso__data_close(struct dso *dso);
+bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
+ EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock);
+void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock);
+void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
int dso__data_file_size(struct dso *dso, struct machine *machine);
off_t dso__data_size(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size);
+uint16_t dso__e_machine(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
struct machine *machine, u64 addr,
u8 *data, ssize_t size);
@@ -808,7 +874,9 @@ static inline bool dso__is_kcore(const struct dso *dso)
static inline bool dso__is_kallsyms(const struct dso *dso)
{
- return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/';
+ enum dso_binary_type bt = dso__binary_type(dso);
+
+ return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS;
}
bool dso__is_object_file(const struct dso *dso);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index cae4f6d63318..36411749e007 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
}
nr++;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index aac96d5d1917..c23b77f8f854 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -767,6 +767,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->socket = env->cpu[al->cpu].socket_id;
}
+ /* Account for possible out-of-order switch events. */
+ al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+ if (test_bit(al->parallelism, symbol_conf.parallelism_filter))
+ al->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ /*
+ * Multiply it by some const to avoid precision loss or dealing
+ * with floats. The multiplier does not matter otherwise since
+ * we only print it as percents.
+ */
+ al->latency = sample->period * 1000 / al->parallelism;
+
if (al->map) {
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2744c54f404e..664bf39567ce 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -67,9 +67,15 @@ enum {
PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13,
PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14,
PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15,
+ PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16,
};
-#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn"
+
+#define PERF_ADDITIONAL_STATE_MASK \
+ (PERF_IP_FLAG_IN_TX | \
+ PERF_IP_FLAG_INTR_DISABLE | \
+ PERF_IP_FLAG_INTR_TOGGLE)
#define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\
@@ -85,6 +91,10 @@ enum {
PERF_IP_FLAG_VMENTRY |\
PERF_IP_FLAG_VMEXIT)
+#define PERF_IP_FLAG_BRANCH_EVENT_MASK \
+ (PERF_IP_FLAG_BRANCH_MISS | \
+ PERF_IP_FLAG_NOT_TAKEN)
+
#define PERF_MEM_DATA_SRC_NONE \
(PERF_MEM_S(OP, NA) |\
PERF_MEM_S(LVL, NA) |\
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index eabd7913c309..dcff697ed252 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -57,6 +57,8 @@ struct events_stats {
struct hists_stats {
u64 total_period;
u64 total_non_filtered_period;
+ u64 total_latency;
+ u64 total_non_filtered_latency;
u32 nr_samples;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f0dd174e2deb..c1a04141aed0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1373,19 +1373,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
*/
cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
- goto out;
+ return -ENOMEM;
threads = perf_thread_map__new_dummy();
- if (!threads)
- goto out_put;
+ if (!threads) {
+ perf_cpu_map__put(cpus);
+ return -ENOMEM;
+ }
perf_evlist__set_maps(&evlist->core, cpus, threads);
-
perf_thread_map__put(threads);
-out_put:
perf_cpu_map__put(cpus);
-out:
- return -ENOMEM;
+ return 0;
}
int evlist__open(struct evlist *evlist)
@@ -2535,10 +2534,10 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
return;
evlist__for_each_entry(evlist, pos) {
- struct perf_cpu_map *intersect, *to_test;
+ struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online();
const struct perf_pmu *pmu = evsel__find_pmu(pos);
- to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online();
+ to_test = pmu && pmu->is_core ? pmu->cpus : online;
intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
char buf[128];
@@ -2548,6 +2547,7 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
}
perf_cpu_map__put(intersect);
+ perf_cpu_map__put(online);
}
perf_cpu_map__put(user_requested_cpus);
}
@@ -2594,3 +2594,17 @@ bool evlist__has_bpf_output(struct evlist *evlist)
return false;
}
+
+bool evlist__needs_bpf_sb_event(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
+ if (!evsel->core.attr.exclude_kernel)
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index adddb1db1ad2..edcbf1c10e92 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -435,5 +435,6 @@ void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
void evlist__uniquify_name(struct evlist *evlist);
bool evlist__has_bpf_output(struct evlist *evlist);
+bool evlist__needs_bpf_sb_event(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bc144388f892..1974395492d7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -237,6 +237,16 @@ set_methods:
return 0;
}
+const char *evsel__pmu_name(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (pmu)
+ return pmu->name;
+
+ return event_type(evsel->core.attr.type);
+}
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
int __evsel__sample_size(u64 sample_type)
@@ -511,6 +521,16 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
}
evsel->cgrp = cgroup__get(orig->cgrp);
#ifdef HAVE_LIBTRACEEVENT
+ if (orig->tp_sys) {
+ evsel->tp_sys = strdup(orig->tp_sys);
+ if (evsel->tp_sys == NULL)
+ goto out_err;
+ }
+ if (orig->tp_name) {
+ evsel->tp_name = strdup(orig->tp_name);
+ if (evsel->tp_name == NULL)
+ goto out_err;
+ }
evsel->tp_format = orig->tp_format;
#endif
evsel->handler = orig->handler;
@@ -634,7 +654,11 @@ struct tep_event *evsel__tp_format(struct evsel *evsel)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
return NULL;
- tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+ if (!evsel->tp_sys)
+ tp_format = trace_event__tp_format_id(evsel->core.attr.config);
+ else
+ tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+
if (IS_ERR(tp_format)) {
int err = -PTR_ERR(evsel->tp_format);
@@ -3164,17 +3188,19 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
if (type & PERF_SAMPLE_REGS_USER) {
+ struct regs_dump *regs = perf_sample__user_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->user_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->user_regs.abi) {
+ if (regs->abi) {
u64 mask = evsel->core.attr.sample_regs_user;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->user_regs.mask = mask;
- data->user_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3218,19 +3244,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
- data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
if (type & PERF_SAMPLE_REGS_INTR) {
+ struct regs_dump *regs = perf_sample__intr_regs(data);
+
OVERFLOW_CHECK_u64(array);
- data->intr_regs.abi = *array;
+ regs->abi = *array;
array++;
- if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->core.attr.sample_regs_intr;
sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
- data->intr_regs.mask = mask;
- data->intr_regs.regs = (u64 *)array;
+ regs->mask = mask;
+ regs->regs = (u64 *)array;
array = (void *)array + sz;
}
}
@@ -3856,10 +3883,10 @@ void evsel__zero_per_pkg(struct evsel *evsel)
*/
bool evsel__is_hybrid(const struct evsel *evsel)
{
- if (perf_pmus__num_core_pmus() == 1)
+ if (!evsel->core.is_pmu_core)
return false;
- return evsel->core.is_pmu_core;
+ return perf_pmus__num_core_pmus() > 1;
}
struct evsel *evsel__leader(const struct evsel *evsel)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5e789fa80590..aae431d63d64 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,7 @@ struct evsel {
bool errored;
bool needs_auxtrace_mmap;
bool default_metricgroup; /* A member of the Default metricgroup */
+ bool needs_uniquify;
struct hashmap *per_pkg_mask;
int err;
int script_output_type;
@@ -236,6 +237,7 @@ int evsel__object_config(size_t object_size,
void (*fini)(struct evsel *evsel));
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel);
+const char *evsel__pmu_name(const struct evsel *evsel);
bool evsel__is_aux_event(const struct evsel *evsel);
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index c221dcce6666..6413537442aa 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -215,6 +215,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **data)
{
+ if (!ctx || !id)
+ return -1;
return hashmap__find(ctx->ids, id, data) ? 0 : -1;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 5dee2caba0fe..a9bc47da83a5 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -24,6 +24,8 @@ struct perf_ftrace {
unsigned int bucket_range;
unsigned int min_latency;
unsigned int max_latency;
+ unsigned int bucket_num;
+ bool hide_empty;
int graph_depth;
int func_stack_trace;
int func_irq_info;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d06aa86352d3..e3cdc3b7b4ab 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -44,6 +44,7 @@
#include "build-id.h"
#include "data.h"
#include <api/fs/fs.h>
+#include <api/io_dir.h>
#include "asm/bug.h"
#include "tool.h"
#include "time-utils.h"
@@ -1311,11 +1312,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
{
unsigned int phys, size = 0;
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
#define for_each_memory(mem, dir) \
- while ((ent = readdir(dir))) \
+ while ((ent = io_dir__readdir(&dir)) != NULL) \
if (strcmp(ent->d_name, ".") && \
strcmp(ent->d_name, "..") && \
sscanf(ent->d_name, "memory%u", &mem) == 1)
@@ -1324,9 +1325,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
"%s/devices/system/node/node%lu",
sysfs__mountpoint(), idx);
- dir = opendir(path);
- if (!dir) {
- pr_warning("failed: can't open memory sysfs data\n");
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
+ pr_warning("failed: can't open memory sysfs data '%s'\n", path);
return -1;
}
@@ -1338,20 +1339,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
n->set = bitmap_zalloc(size);
if (!n->set) {
- closedir(dir);
+ close(dir.dirfd);
return -ENOMEM;
}
n->node = idx;
n->size = size;
- rewinddir(dir);
+ io_dir__rewinddir(&dir);
for_each_memory(phys, dir) {
__set_bit(phys, n->set);
}
- closedir(dir);
+ close(dir.dirfd);
return 0;
}
@@ -1374,8 +1375,8 @@ static int memory_node__sort(const void *a, const void *b)
static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
{
char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
int ret = 0;
size_t cnt = 0, size = 0;
struct memory_node *nodes = NULL;
@@ -1383,14 +1384,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
scnprintf(path, PATH_MAX, "%s/devices/system/node/",
sysfs__mountpoint());
- dir = opendir(path);
- if (!dir) {
+ io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (dir.dirfd < 0) {
pr_debug2("%s: couldn't read %s, does this arch have topology information?\n",
__func__, path);
return -1;
}
- while (!ret && (ent = readdir(dir))) {
+ while (!ret && (ent = io_dir__readdir(&dir))) {
unsigned int idx;
int r;
@@ -1419,7 +1420,7 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
cnt += 1;
}
out:
- closedir(dir);
+ close(dir.dirfd);
if (!ret) {
*cntp = cnt;
*nodesp = nodes;
@@ -2769,6 +2770,8 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
free(name);
pmu_num--;
}
+ /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */
+ free(ff->ph->env.pmu_mappings);
ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0f30f843c566..d65228c11412 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_socket(struct hists *hists,
struct hist_entry *he);
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he);
u16 hists__col_len(struct hists *hists, enum hist_column col)
{
@@ -207,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_PARALLELISM, 11);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -302,9 +305,10 @@ static long hist_time(unsigned long htime)
return htime;
}
-static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency)
{
he_stat->period += period;
+ he_stat->latency += latency;
he_stat->nr_events += 1;
}
@@ -319,6 +323,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->weight2 += src->weight2;
dest->weight3 += src->weight3;
dest->nr_events += src->nr_events;
+ dest->latency += src->latency;
}
static void he_stat__decay(struct he_stat *he_stat)
@@ -328,6 +333,7 @@ static void he_stat__decay(struct he_stat *he_stat)
he_stat->weight1 = (he_stat->weight1 * 7) / 8;
he_stat->weight2 = (he_stat->weight2 * 7) / 8;
he_stat->weight3 = (he_stat->weight3 * 7) / 8;
+ he_stat->latency = (he_stat->latency * 7) / 8;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
@@ -335,7 +341,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
{
u64 prev_period = he->stat.period;
- u64 diff;
+ u64 prev_latency = he->stat.latency;
if (prev_period == 0)
return true;
@@ -345,12 +351,16 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
he_stat__decay(he->stat_acc);
decay_callchain(he->callchain);
- diff = prev_period - he->stat.period;
-
if (!he->depth) {
- hists->stats.total_period -= diff;
- if (!he->filtered)
- hists->stats.total_non_filtered_period -= diff;
+ u64 period_diff = prev_period - he->stat.period;
+ u64 latency_diff = prev_latency - he->stat.latency;
+
+ hists->stats.total_period -= period_diff;
+ hists->stats.total_latency -= latency_diff;
+ if (!he->filtered) {
+ hists->stats.total_non_filtered_period -= period_diff;
+ hists->stats.total_non_filtered_latency -= latency_diff;
+ }
}
if (!he->leaf) {
@@ -365,7 +375,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
}
}
- return he->stat.period == 0;
+ return he->stat.period == 0 && he->stat.latency == 0;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
@@ -584,21 +594,24 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
return he;
}
-static u8 symbol__parent_filter(const struct symbol *parent)
+static filter_mask_t symbol__parent_filter(const struct symbol *parent)
{
if (symbol_conf.exclude_other && parent == NULL)
return 1 << HIST_FILTER__PARENT;
return 0;
}
-static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency)
{
if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
return;
he->hists->callchain_period += period;
- if (!he->filtered)
+ he->hists->callchain_latency += latency;
+ if (!he->filtered) {
he->hists->callchain_non_filtered_period += period;
+ he->hists->callchain_non_filtered_latency += latency;
+ }
}
static struct hist_entry *hists__findnew_entry(struct hists *hists,
@@ -611,6 +624,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
struct hist_entry *he;
int64_t cmp;
u64 period = entry->stat.period;
+ u64 latency = entry->stat.latency;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
@@ -629,10 +643,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (!cmp) {
if (sample_self) {
he_stat__add_stat(&he->stat, &entry->stat);
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period);
+ he_stat__add_period(he->stat_acc, period, latency);
block_info__delete(entry->block_info);
@@ -669,7 +683,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
return NULL;
if (sample_self)
- hist_entry__add_callchain_period(he, period);
+ hist_entry__add_callchain_period(he, period, latency);
hists->nr_entries++;
rb_link_node(&he->rb_node_in, parent, p);
@@ -741,12 +755,14 @@ __hists__add_entry(struct hists *hists,
.ip = al->addr,
.level = al->level,
.code_page_size = sample->code_page_size,
+ .parallelism = al->parallelism,
.stat = {
.nr_events = 1,
.period = sample->period,
.weight1 = sample->weight,
.weight2 = sample->ins_lat,
.weight3 = sample->p_stage_cyc,
+ .latency = al->latency,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
@@ -975,8 +991,6 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
if (he == NULL)
return -ENOMEM;
- hists__inc_nr_samples(hists, he->filtered);
-
out:
iter->he = he;
iter->curr++;
@@ -995,9 +1009,15 @@ static int
iter_finish_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
+ struct evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+
for (int i = 0; i < iter->total; i++)
branch_info__exit(&iter->bi[i]);
+ if (iter->he)
+ hists__inc_nr_samples(hists, iter->he->filtered);
+
zfree(&iter->bi);
iter->he = NULL;
@@ -1365,6 +1385,16 @@ void hist_entry__delete(struct hist_entry *he)
{
struct hist_entry_ops *ops = he->ops;
+ if (symbol_conf.report_hierarchy) {
+ struct rb_root *root = &he->hroot_out.rb_root;
+ struct hist_entry *child, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node)
+ hist_entry__delete(child);
+
+ *root = RB_ROOT;
+ }
+
thread__zput(he->thread);
map_symbol__exit(&he->ms);
@@ -1455,6 +1485,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
if (symbol_conf.sym_list == NULL)
return;
break;
+ case HIST_FILTER__PARALLELISM:
+ if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0)
+ return;
+ break;
case HIST_FILTER__PARENT:
case HIST_FILTER__GUEST:
case HIST_FILTER__HOST:
@@ -1513,6 +1547,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
perf_hpp__is_sym_entry);
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM,
+ perf_hpp__is_parallelism_entry);
+
hists__apply_filters(he->hists, he);
}
@@ -1709,6 +1746,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
hists__filter_entry_by_thread(hists, he);
hists__filter_entry_by_symbol(hists, he);
hists__filter_entry_by_socket(hists, he);
+ hists__filter_entry_by_parallelism(hists, he);
}
int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
@@ -1756,12 +1794,14 @@ static void hists__reset_filter_stats(struct hists *hists)
{
hists->nr_non_filtered_entries = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_non_filtered_latency = 0;
}
void hists__reset_stats(struct hists *hists)
{
hists->nr_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_latency = 0;
hists__reset_filter_stats(hists);
}
@@ -1770,6 +1810,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
{
hists->nr_non_filtered_entries++;
hists->stats.total_non_filtered_period += h->stat.period;
+ hists->stats.total_non_filtered_latency += h->stat.latency;
}
void hists__inc_stats(struct hists *hists, struct hist_entry *h)
@@ -1779,6 +1820,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
hists->nr_entries++;
hists->stats.total_period += h->stat.period;
+ hists->stats.total_latency += h->stat.latency;
}
static void hierarchy_recalc_total_periods(struct hists *hists)
@@ -1790,6 +1832,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
hists->stats.total_period = 0;
hists->stats.total_non_filtered_period = 0;
+ hists->stats.total_latency = 0;
+ hists->stats.total_non_filtered_latency = 0;
/*
* recalculate total period using top-level entries only
@@ -1801,8 +1845,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
node = rb_next(node);
hists->stats.total_period += he->stat.period;
- if (!he->filtered)
+ hists->stats.total_latency += he->stat.latency;
+ if (!he->filtered) {
hists->stats.total_non_filtered_period += he->stat.period;
+ hists->stats.total_non_filtered_latency += he->stat.latency;
+ }
}
}
@@ -2195,6 +2242,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists,
return false;
}
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (test_bit(he->parallelism, hists->parallelism_filter)) {
+ he->filtered |= (1 << HIST_FILTER__PARALLELISM);
+ return true;
+ }
+ return false;
+}
+
typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
@@ -2364,6 +2421,16 @@ void hists__filter_by_socket(struct hists *hists)
hists__filter_entry_by_socket);
}
+void hists__filter_by_parallelism(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM,
+ hists->parallelism_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__PARALLELISM,
+ hists__filter_entry_by_parallelism);
+}
+
void events_stats__inc(struct events_stats *stats, u32 type)
{
++stats->nr_events[0];
@@ -2759,6 +2826,12 @@ u64 hists__total_period(struct hists *hists)
hists->stats.total_period;
}
+u64 hists__total_latency(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency :
+ hists->stats.total_latency;
+}
+
int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
{
char unit;
@@ -2870,6 +2943,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries = RB_ROOT_CACHED;
mutex_init(&hists->lock);
hists->socket_filter = -1;
+ hists->parallelism_filter = symbol_conf.parallelism_filter;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
return 0;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 46c8373e3146..317d06cca8b8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -31,8 +31,11 @@ enum hist_filter {
HIST_FILTER__HOST,
HIST_FILTER__SOCKET,
HIST_FILTER__C2C,
+ HIST_FILTER__PARALLELISM,
};
+typedef u16 filter_mask_t;
+
enum hist_column {
HISTC_SYMBOL,
HISTC_TIME,
@@ -42,6 +45,7 @@ enum hist_column {
HISTC_CGROUP_ID,
HISTC_CGROUP,
HISTC_PARENT,
+ HISTC_PARALLELISM,
HISTC_CPU,
HISTC_SOCKET,
HISTC_SRCLINE,
@@ -105,10 +109,13 @@ struct hists {
u64 nr_non_filtered_entries;
u64 callchain_period;
u64 callchain_non_filtered_period;
+ u64 callchain_latency;
+ u64 callchain_non_filtered_latency;
struct thread *thread_filter;
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
+ unsigned long *parallelism_filter;
struct mutex lock;
struct hists_stats stats;
u64 event_stream;
@@ -165,6 +172,12 @@ struct res_sample {
struct he_stat {
u64 period;
+ /*
+ * Period re-scaled from CPU time to wall-clock time (divided by the
+ * parallelism at the time of the sample). This represents effect of
+ * the event on latency rather than CPU consumption.
+ */
+ u64 latency;
u64 period_sys;
u64 period_us;
u64 period_guest_sys;
@@ -226,15 +239,16 @@ struct hist_entry {
u64 cgroup;
u64 ip;
u64 transaction;
- s32 socket;
- s32 cpu;
u64 code_page_size;
u64 weight;
u64 ins_lat;
u64 p_stage_cyc;
+ s32 socket;
+ s32 cpu;
+ int parallelism;
+ int mem_type_off;
u8 cpumode;
u8 depth;
- int mem_type_off;
struct simd_flags simd_flags;
/* We are added by hists__add_dummy_entry. */
@@ -242,7 +256,7 @@ struct hist_entry {
bool leaf;
char level;
- u8 filtered;
+ filter_mask_t filtered;
u16 callchain_size;
union {
@@ -368,6 +382,7 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows);
struct hist_entry *hists__get_entry(struct hists *hists, int idx);
u64 hists__total_period(struct hists *hists);
+u64 hists__total_latency(struct hists *hists);
void hists__reset_stats(struct hists *hists);
void hists__inc_stats(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists);
@@ -384,11 +399,13 @@ void hists__filter_by_dso(struct hists *hists);
void hists__filter_by_thread(struct hists *hists);
void hists__filter_by_symbol(struct hists *hists);
void hists__filter_by_socket(struct hists *hists);
+void hists__filter_by_parallelism(struct hists *hists);
static inline bool hists__has_filter(struct hists *hists)
{
return hists->thread_filter || hists->dso_filter ||
- hists->symbol_filter_str || (hists->socket_filter > -1);
+ hists->symbol_filter_str || (hists->socket_filter > -1) ||
+ hists->parallelism_filter;
}
u16 hists__col_len(struct hists *hists, enum hist_column col);
@@ -547,11 +564,13 @@ extern struct perf_hpp_fmt perf_hpp__format[];
enum {
/* Matches perf_hpp__format array. */
PERF_HPP__OVERHEAD,
+ PERF_HPP__LATENCY,
PERF_HPP__OVERHEAD_SYS,
PERF_HPP__OVERHEAD_US,
PERF_HPP__OVERHEAD_GUEST_SYS,
PERF_HPP__OVERHEAD_GUEST_US,
PERF_HPP__OVERHEAD_ACC,
+ PERF_HPP__LATENCY_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
PERF_HPP__WEIGHT1,
@@ -563,6 +582,7 @@ enum {
void perf_hpp__init(void);
void perf_hpp__cancel_cumulate(void);
+void perf_hpp__cancel_latency(void);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
@@ -580,6 +600,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
@@ -606,6 +627,7 @@ void hists__reset_column_width(struct hists *hists);
enum perf_hpp_fmt_type {
PERF_HPP_FMT_TYPE__RAW,
PERF_HPP_FMT_TYPE__PERCENT,
+ PERF_HPP_FMT_TYPE__LATENCY,
PERF_HPP_FMT_TYPE__AVERAGE,
};
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index 4acb9bb19b84..3cce77fc8004 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -11,13 +11,13 @@
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
@@ -108,20 +108,6 @@ struct hwmon_pmu {
};
/**
- * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
- * represents an event.
- *
- * Related hwmon files start <type><number> that this key represents.
- */
-union hwmon_pmu_event_key {
- long type_and_num;
- struct {
- int num :16;
- enum hwmon_type type :8;
- };
-};
-
-/**
* struct hwmon_pmu_event_value: Value in hwmon_pmu->events.
*
* Hwmon files are of the form <type><number>_<item> and may have a suffix
@@ -249,31 +235,22 @@ static void fix_name(char *p)
static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
{
- DIR *dir;
- struct dirent *ent;
- int dup_fd, err = 0;
+ int err = 0;
struct hashmap_entry *cur, *tmp;
size_t bkt;
+ struct io_dirent64 *ent;
+ struct io_dir dir;
if (pmu->pmu.sysfs_aliases_loaded)
return 0;
- /*
- * Use a dup-ed fd as closedir will close it. Use openat so that the
- * directory contents are refreshed.
- */
- dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY);
-
- if (dup_fd == -1)
- return -ENOMEM;
+ /* Use openat so that the directory contents are refreshed. */
+ io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY));
- dir = fdopendir(dup_fd);
- if (!dir) {
- close(dup_fd);
- return -ENOMEM;
- }
+ if (dir.dirfd < 0)
+ return -ENOENT;
- while ((ent = readdir(dir)) != NULL) {
+ while ((ent = io_dir__readdir(&dir)) != NULL) {
enum hwmon_type type;
int number;
enum hwmon_item item;
@@ -361,7 +338,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
pmu->pmu.sysfs_aliases_loaded = true;
err_out:
- closedir(dir);
+ close(dir.dirfd);
return err;
}
@@ -716,8 +693,8 @@ int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_inf
int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
{
char *line = NULL;
- DIR *class_hwmon_dir;
- struct dirent *class_hwmon_ent;
+ struct io_dirent64 *class_hwmon_ent;
+ struct io_dir class_hwmon_dir;
char buf[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
@@ -725,11 +702,12 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
return 0;
scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs);
- class_hwmon_dir = opendir(buf);
- if (!class_hwmon_dir)
+ io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+
+ if (class_hwmon_dir.dirfd < 0)
return 0;
- while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) {
+ while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) {
size_t line_len;
int hwmon_dir, name_fd;
struct io io;
@@ -759,7 +737,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
close(name_fd);
}
free(line);
- closedir(class_hwmon_dir);
+ close(class_hwmon_dir.dirfd);
return 0;
}
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
index 882566846df4..b3329774d2b2 100644
--- a/tools/perf/util/hwmon_pmu.h
+++ b/tools/perf/util/hwmon_pmu.h
@@ -91,6 +91,22 @@ enum hwmon_item {
HWMON_ITEM__MAX,
};
+/**
+ * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key
+ * represents an event.
+ * union is exposed for testing to ensure problems are avoided on big
+ * endian machines.
+ *
+ * Related hwmon files start <type><number> that this key represents.
+ */
+union hwmon_pmu_event_key {
+ long type_and_num;
+ struct {
+ int num :16;
+ enum hwmon_type type :8;
+ };
+};
+
bool perf_pmu__is_hwmon(const struct perf_pmu *pmu);
bool evsel__is_hwmon(const struct evsel *evsel);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index a7c589fecb98..3625c6224750 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
int ret;
struct intel_bts *bts = btsq->bts;
union perf_event event;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
if (bts->synth_opts.initial_skip &&
bts->num_events++ <= bts->synth_opts.initial_skip)
return 0;
+ perf_sample__init(&sample, /*all=*/true);
sample.ip = le64_to_cpu(branch->from);
sample.cpumode = intel_bts_cpumode(bts, sample.ip);
sample.pid = btsq->pid;
@@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
pr_err("Intel BTS: failed to deliver branch event, error %d\n",
ret);
+ perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 30be6dfe09eb..4e8a9b172fbc 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1764,12 +1764,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct dummy_branch_stack {
u64 nr;
u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
+ int ret;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
@@ -1777,6 +1778,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_b_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->branches_id;
@@ -1806,8 +1808,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_event(pt, event, &sample,
+ perf_sample__exit(&sample);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
pt->branches_sample_type);
+ return ret;
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1835,11 +1839,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->instructions_id;
@@ -1859,16 +1865,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->instructions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->instructions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
u64 period = 0;
+ int ret;
if (ptq->sample_ipc)
period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt;
@@ -1876,6 +1885,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
if (!period || intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cycles_id;
@@ -1887,25 +1897,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt;
ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt;
- return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->transactions_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->transactions_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static void intel_pt_prep_p_sample(struct intel_pt *pt,
@@ -1953,15 +1969,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_cbr raw;
u32 flags;
+ int ret;
if (intel_pt_skip_cbr_event(pt))
return 0;
ptq->cbr_seen = ptq->state->cbr;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->cbr_id;
@@ -1975,20 +1993,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_psb raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->psb_id;
@@ -2001,20 +2023,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_mwait raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->mwait_id;
@@ -2026,20 +2052,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwre raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwre_id;
@@ -2051,20 +2081,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_exstop raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->exstop_id;
@@ -2076,20 +2110,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_pwrx raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->pwrx_id;
@@ -2101,8 +2139,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->pwr_events_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->pwr_events_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
/*
@@ -2235,16 +2275,18 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt;
u64 sample_type = evsel->core.attr.sample_type;
u8 cpumode;
- u64 regs[8 * sizeof(sample.intr_regs.mask)];
+ u64 regs[8 * sizeof(sample.intr_regs->mask)];
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_a_sample(ptq, event, &sample);
sample.id = id;
@@ -2291,15 +2333,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
items->mask[INTEL_PT_XMM_POS])) {
u64 regs_mask = evsel->core.attr.sample_regs_intr;
u64 *pos;
+ struct regs_dump *intr_regs = perf_sample__intr_regs(&sample);
- sample.intr_regs.abi = items->is_32_bit ?
+ intr_regs->abi = items->is_32_bit ?
PERF_SAMPLE_REGS_ABI_32 :
PERF_SAMPLE_REGS_ABI_64;
- sample.intr_regs.regs = regs;
+ intr_regs->regs = regs;
- pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+ pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask);
- intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ intel_pt_add_xmm(intr_regs, pos, items, regs_mask);
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -2361,7 +2404,9 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
@@ -2407,16 +2452,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct {
struct perf_synth_intel_evt cfe;
struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
} raw;
- int i;
+ int i, ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->evt_id;
@@ -2438,20 +2484,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->evt_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->evt_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
- struct perf_sample sample = { .ip = 0, };
+ struct perf_sample sample;
struct perf_synth_intel_iflag_chg raw;
+ int ret;
if (intel_pt_skip_event(pt))
return 0;
+ perf_sample__init(&sample, /*all=*/true);
intel_pt_prep_p_sample(pt, ptq, event, &sample);
sample.id = ptq->pt->iflag_chg_id;
@@ -2471,8 +2521,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, event, &sample,
- pt->iflag_chg_sample_type);
+ ret = intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->iflag_chg_sample_type);
+ perf_sample__exit(&sample);
+ return ret;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 50a3c3e07160..2c421b475b3b 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -254,7 +254,7 @@ int tpebs_start(struct evlist *evsel_list)
new = zalloc(sizeof(*new));
if (!new) {
ret = -1;
- zfree(name);
+ zfree(&name);
goto err;
}
new->name = name;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index f23e21502bf8..624964f01b5f 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -516,7 +516,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -535,6 +535,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
out:
+ perf_sample__exit(&sample);
free(event);
return ret;
}
@@ -611,7 +612,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* create pseudo sample to induce dso hit increment
* use first address as sample address
*/
- memset(&sample, 0, sizeof(sample));
+ perf_sample__init(&sample, /*all=*/true);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
@@ -620,12 +621,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
if (ret)
- return ret;
+ goto out;
ret = jit_inject_event(jd, event);
if (!ret)
build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index a09f7fe877df..b5d916aa49df 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -168,6 +168,8 @@ int lock_contention_stop(void);
int lock_contention_read(struct lock_contention *con);
int lock_contention_finish(struct lock_contention *con);
+struct lock_stat *pop_owner_stack_trace(struct lock_contention *con);
+
#else /* !HAVE_BPF_SKEL */
static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
@@ -187,6 +189,11 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse
return 0;
}
+static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused)
+{
+ return NULL;
+}
+
#endif /* HAVE_BPF_SKEL */
#endif /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index af9a97612f9d..bbcd2ffcf4bd 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret)
}
}
-int lzma_decompress_to_file(const char *input, int output_fd)
+int lzma_decompress_stream_to_file(FILE *infile, int output_fd)
{
lzma_action action = LZMA_RUN;
lzma_stream strm = LZMA_STREAM_INIT;
@@ -41,18 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd)
u8 buf_in[BUFSIZE];
u8 buf_out[BUFSIZE];
- FILE *infile;
-
- infile = fopen(input, "rb");
- if (!infile) {
- pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
- return -1;
- }
ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK) {
pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret);
- goto err_fclose;
+ return err;
}
strm.next_in = NULL;
@@ -100,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd)
err = 0;
err_lzma_end:
lzma_end(&strm);
-err_fclose:
- fclose(infile);
return err;
}
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+ FILE *infile;
+ int ret;
+
+ infile = fopen(input, "rb");
+ if (!infile) {
+ pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
+ return -1;
+ }
+
+ ret = lzma_decompress_stream_to_file(infile, output_fd);
+ fclose(infile);
+ return ret;
+}
+
bool lzma_is_compressed(const char *input)
{
int fd = open(input, O_RDONLY);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2d51badfbf2e..2531b373f2cf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -37,6 +37,7 @@
#include <internal/lib.h> // page_size
#include "cgroup.h"
#include "arm64-frame-pointer-unwind-support.h"
+#include <api/io_dir.h>
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -94,6 +95,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
machine->vmlinux_map = NULL;
+ /* There is no initial context switch in, so we start at 1. */
+ machine->parallelism = 1;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -677,8 +680,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
+ machine->parallelism += out ? -1 : 1;
return 0;
}
@@ -712,7 +718,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
map__set_start(map, event->ksymbol.addr);
map__set_end(map, map__start(map) + event->ksymbol.len);
- err = maps__insert(machine__kernel_maps(machine), map);
+ err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map);
if (err) {
err = -ENOMEM;
goto out;
@@ -773,6 +779,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
if (dump_trace)
perf_event__fprintf_ksymbol(event, stdout);
+ /* no need to process non-JIT BPF as it cannot get samples */
+ if (event->ksymbol.len == 0)
+ return 0;
+
if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
return machine__process_ksymbol_unregister(machine, event,
sample);
@@ -886,26 +896,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
return ret;
}
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
-{
- int i;
- size_t printed = 0;
- struct dso *kdso = machine__kernel_dso(machine);
-
- if (dso__has_build_id(kdso)) {
- char filename[PATH_MAX];
-
- if (dso__build_id_filename(kdso, filename, sizeof(filename), false))
- printed += fprintf(fp, "[0] %s\n", filename);
- }
-
- for (i = 0; i < vmlinux_path__nr_entries; ++i) {
- printed += fprintf(fp, "[%d] %s\n", i + dso__has_build_id(kdso),
- vmlinux_path[i]);
- }
- return printed;
-}
-
struct machine_fprintf_cb_args {
FILE *fp;
size_t printed;
@@ -1352,27 +1342,24 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
return 0;
}
-static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
+static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth)
{
- struct dirent *dent;
- DIR *dir = opendir(dir_name);
+ struct io_dirent64 *dent;
+ struct io_dir iod;
+ size_t root_len = strlen(path);
int ret = 0;
- if (!dir) {
- pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0) {
+ pr_debug("%s: cannot open %s dir\n", __func__, path);
return -1;
}
-
- while ((dent = readdir(dir)) != NULL) {
- char path[PATH_MAX];
- struct stat st;
-
- /*sshfs might return bad dent->d_type, so we have to stat*/
- path__join(path, sizeof(path), dir_name, dent->d_name);
- if (stat(path, &st))
- continue;
-
- if (S_ISDIR(st.st_mode)) {
+ /* Bounds check, should never happen. */
+ if (root_len >= path_size)
+ return -1;
+ path[root_len++] = '/';
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
+ if (io_dir__is_dir(&iod, dent)) {
if (!strcmp(dent->d_name, ".") ||
!strcmp(dent->d_name, ".."))
continue;
@@ -1384,7 +1371,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
continue;
}
- ret = maps__set_modules_path_dir(maps, path, depth + 1);
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) >= path_size)
+ continue;
+
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1);
if (ret < 0)
goto out;
} else {
@@ -1394,9 +1386,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
if (ret)
goto out;
- if (m.kmod)
- ret = maps__set_module_path(maps, path, &m);
+ if (m.kmod) {
+ /* Bounds check, should never happen. */
+ if (root_len + strlen(dent->d_name) < path_size) {
+ strcpy(path + root_len, dent->d_name);
+ ret = maps__set_module_path(maps, path, &m);
+ }
+ }
zfree(&m.name);
if (ret)
@@ -1405,7 +1402,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
}
out:
- closedir(dir);
+ close(iod.dirfd);
return ret;
}
@@ -1422,7 +1419,8 @@ static int machine__set_modules_path(struct machine *machine)
machine->root_dir, version);
free(version);
- return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0);
+ return maps__set_modules_path_dir(machine__kernel_maps(machine),
+ modules_path, sizeof(modules_path), 0);
}
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
u64 *size __maybe_unused,
@@ -1468,8 +1466,6 @@ static int machine__create_modules(struct machine *machine)
if (modules__parse(modules, machine, machine__create_module))
return -1;
- maps__fixup_end(machine__kernel_maps(machine));
-
if (!machine__set_modules_path(machine))
return 0;
@@ -1563,6 +1559,8 @@ int machine__create_kernel_maps(struct machine *machine)
}
}
+ maps__fixup_end(machine__kernel_maps(machine));
+
out_put:
dso__put(kernel);
return ret;
@@ -1900,6 +1898,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
+ /* There is no context switch out before exit, so we decrement here. */
+ machine->parallelism--;
if (thread != NULL) {
if (symbol_conf.keep_exited_threads)
thread__set_exited(thread, /*exited=*/true);
@@ -2929,8 +2929,8 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
return 0;
/* Bail out if nothing was captured. */
- if ((!sample->user_regs.regs) ||
- (!sample->user_stack.size))
+ if (!sample->user_regs || !sample->user_regs->regs ||
+ !sample->user_stack.size)
return 0;
if (!symbols)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 2e5a4cb342d8..b56abec84fed 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -50,6 +50,12 @@ struct machine {
u64 text_start;
u64 text_end;
} sched, lock, traceiter, trace;
+ /*
+ * The current parallelism level (number of threads that run on CPUs).
+ * This value can be less than 1, or larger than the total number
+ * of CPUs, if events are poorly ordered.
+ */
+ int parallelism;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */
@@ -266,8 +272,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid);
int machines__create_guest_kernel_maps(struct machines *machines);
void machines__destroy_kernel_maps(struct machines *machines);
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
-
typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 09c9cc326c08..0b40d901675e 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -428,11 +428,29 @@ static unsigned int maps__by_name_index(const struct maps *maps, const struct ma
return -1;
}
+static void map__set_kmap_maps(struct map *map, struct maps *maps)
+{
+ struct dso *dso;
+
+ if (map == NULL)
+ return;
+
+ dso = map__dso(map);
+
+ if (dso && dso__kernel(dso)) {
+ struct kmap *kmap = map__kmap(map);
+
+ if (kmap)
+ kmap->kmaps = maps;
+ else
+ pr_err("Internal error: kernel dso with non kernel map\n");
+ }
+}
+
static int __maps__insert(struct maps *maps, struct map *new)
{
struct map **maps_by_address = maps__maps_by_address(maps);
struct map **maps_by_name = maps__maps_by_name(maps);
- const struct dso *dso = map__dso(new);
unsigned int nr_maps = maps__nr_maps(maps);
unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated;
@@ -483,14 +501,9 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- if (dso && dso__kernel(dso)) {
- struct kmap *kmap = map__kmap(new);
- if (kmap)
- kmap->kmaps = maps;
- else
- pr_err("Internal error: kernel dso with non kernel map\n");
- }
+ map__set_kmap_maps(new, maps);
+
return 0;
}
@@ -785,6 +798,9 @@ static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_ind
}
RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add;
maps__set_maps_by_name_sorted(maps, false);
+ map__set_kmap_maps(new1, maps);
+ map__set_kmap_maps(new2, maps);
+
check_invariants(maps);
return 0;
}
@@ -797,7 +813,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
{
int err = 0;
FILE *fp = debug_file();
- unsigned int i;
+ unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name...
if (!maps__maps_by_address_sorted(maps))
__maps__sort_by_address(maps);
@@ -808,6 +824,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) {
struct map **maps_by_address = maps__maps_by_address(maps);
+ struct map **maps_by_name = maps__maps_by_name(maps);
struct map *pos = maps_by_address[i];
struct map *before = NULL, *after = NULL;
@@ -827,6 +844,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
map__fprintf(pos, fp);
}
+ if (maps_by_name)
+ ni = maps__by_name_index(maps, pos);
+
/*
* Now check if we need to create new maps for areas not
* overlapped by the new map:
@@ -871,6 +891,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(before);
+ }
+
/* Maps are still ordered, go to next one. */
i++;
if (after) {
@@ -892,6 +918,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
err = __maps__insert_sorted(maps, i + 1, after, NULL);
map__put(after);
check_invariants(maps);
@@ -910,6 +942,14 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+
+ if (maps_by_name) {
+ map__put(maps_by_name[ni]);
+ maps_by_name[ni] = map__get(new);
+ }
+
+ map__set_kmap_maps(new, maps);
+
check_invariants(maps);
return err;
}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3692e988c86e..884d9aebce91 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -31,9 +31,6 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 };
-static char mem_loads_name[100];
-static char mem_stores_name[100];
-
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
{
if (i >= PERF_MEM_EVENTS__MAX || !pmu)
@@ -81,7 +78,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
return num;
}
-static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
+static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i,
+ char *buf, size_t buf_size)
{
struct perf_mem_event *e;
@@ -96,31 +94,31 @@ static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
if (e->ldlat) {
if (!e->aux_event) {
/* ARM and Most of Intel */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name,
perf_mem_events__loads_ldlat);
} else {
/* Intel with mem-loads-aux event */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name, pmu->name,
perf_mem_events__loads_ldlat);
}
} else {
if (!e->aux_event) {
/* AMD and POWER */
- scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- } else
+ } else {
return NULL;
+ }
}
-
- return mem_loads_name;
+ return buf;
}
if (i == PERF_MEM_EVENTS__STORE) {
- scnprintf(mem_stores_name, sizeof(mem_stores_name),
+ scnprintf(buf, buf_size,
e->name, pmu->name);
- return mem_stores_name;
+ return buf;
}
return NULL;
@@ -189,7 +187,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu
if (!e->event_name)
return true;
- scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name);
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name);
return !stat(path, &st);
}
@@ -238,69 +236,87 @@ void perf_pmu__mem_events_list(struct perf_pmu *pmu)
int j;
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ char buf[128];
struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
fprintf(stderr, "%-*s%-*s%s",
e->tag ? 13 : 0,
e->tag ? : "",
e->tag && verbose > 0 ? 25 : 0,
- e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "",
+ e->tag && verbose > 0
+ ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))
+ : "",
e->supported ? ": available\n" : "");
}
}
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out)
{
const char *mnt = sysfs__mount();
struct perf_pmu *pmu = NULL;
- struct perf_mem_event *e;
int i = *argv_nr;
- const char *s;
- char *copy;
struct perf_cpu_map *cpu_map = NULL;
- int ret;
+ size_t event_name_storage_size =
+ perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128;
+ size_t event_name_storage_remaining = event_name_storage_size;
+ char *event_name_storage = malloc(event_name_storage_size);
+ char *event_name_storage_ptr = event_name_storage;
+
+ if (!event_name_storage)
+ return -ENOMEM;
+ *event_name_storage_out = NULL;
while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- e = perf_pmu__mem_events_ptr(pmu, j);
+ const char *s;
+ struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
+ int ret;
if (!perf_mem_record[j])
continue;
if (!e->supported) {
+ char buf[128];
+
pr_err("failed: event '%s' not supported\n",
- perf_pmu__mem_events_name(j, pmu));
+ perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)));
+ free(event_name_storage);
return -1;
}
- s = perf_pmu__mem_events_name(j, pmu);
+ s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr,
+ event_name_storage_remaining);
if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
continue;
- copy = strdup(s);
- if (!copy)
- return -1;
-
rec_argv[i++] = "-e";
- rec_argv[i++] = copy;
+ rec_argv[i++] = event_name_storage_ptr;
+ event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1;
+ event_name_storage_ptr += strlen(event_name_storage_ptr) + 1;
ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
- if (ret < 0)
+ if (ret < 0) {
+ free(event_name_storage);
return ret;
+ }
}
}
if (cpu_map) {
- if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
+ struct perf_cpu_map *online = cpu_map__online();
+
+ if (!perf_cpu_map__equal(cpu_map, online)) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
+ perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
*argv_nr = i;
+ *event_name_storage_out = event_name_storage;
return 0;
}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 8dc27db9fd52..a5c19d39ee37 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -38,7 +38,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
bool is_mem_loads_aux_event(struct evsel *leader);
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
+ char **event_name_storage_out);
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 43b02293f1d2..a34726219af3 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -244,9 +244,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
{
int idx, nr_cpus;
struct perf_cpu cpu;
- const struct perf_cpu_map *cpu_map = NULL;
+ struct perf_cpu_map *cpu_map = cpu_map__online();
- cpu_map = cpu_map__online();
if (!cpu_map)
return;
@@ -256,6 +255,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
if (cpu__get_node(cpu) == node)
__set_bit(cpu.cpu, mask->bits);
}
+ perf_cpu_map__put(cpu_map);
}
static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
@@ -356,14 +356,3 @@ int perf_mmap__push(struct mmap *md, void *to,
out:
return rc;
}
-
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone)
-{
- clone->nbits = original->nbits;
- clone->bits = bitmap_zalloc(original->nbits);
- if (!clone->bits)
- return -ENOMEM;
-
- memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original));
- return 0;
-}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 0df6e1621c7e..4d72c5fa5084 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -61,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map);
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
-int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
- struct mmap_cpu_mask *clone);
-
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 40661120cacc..62d258c71ded 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -33,6 +33,12 @@
/* Documents if a type is a lockable type. */
#define LOCKABLE __attribute__((lockable))
+/* Documents a function that expects a lock not to be held prior to entry. */
+#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__)))
+
+/* Documents a function that returns a lock. */
+#define LOCK_RETURNED(x) __attribute__((lock_returned(x)))
+
/* Documents functions that acquire a lock in the body of a function, and do not release it. */
#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
@@ -57,6 +63,8 @@
#define GUARDED_BY(x)
#define PT_GUARDED_BY(x)
#define LOCKABLE
+#define LOCKS_EXCLUDED(...)
+#define LOCK_RETURNED(x)
#define EXCLUSIVE_LOCK_FUNCTION(...)
#define UNLOCK_FUNCTION(...)
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 1e23faa364b1..5152fd5a6ead 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,7 @@
#include "strbuf.h"
#include "debug.h"
#include <api/fs/tracing_path.h>
+#include <api/io_dir.h>
#include <perf/cpumap.h>
#include <util/parse-events-bison.h>
#include <util/parse-events-flex.h>
@@ -554,8 +555,8 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
char *evt_path;
- struct dirent *evt_ent;
- DIR *evt_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir evt_dir;
int ret = 0, found = 0;
evt_path = get_events_file(sys_name);
@@ -563,14 +564,14 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- evt_dir = opendir(evt_path);
- if (!evt_dir) {
+ io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (evt_dir.dirfd < 0) {
put_events_file(evt_path);
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (evt_ent = readdir(evt_dir))) {
+ while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) {
if (!strcmp(evt_ent->d_name, ".")
|| !strcmp(evt_ent->d_name, "..")
|| !strcmp(evt_ent->d_name, "enable")
@@ -592,7 +593,7 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
}
put_events_file(evt_path);
- closedir(evt_dir);
+ close(evt_dir.dirfd);
return ret;
}
@@ -615,17 +616,23 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
struct parse_events_error *err,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
- struct dirent *events_ent;
- DIR *events_dir;
+ struct io_dirent64 *events_ent;
+ struct io_dir events_dir;
int ret = 0;
+ char *events_dir_path = get_tracing_file("events");
- events_dir = tracing_events__opendir();
- if (!events_dir) {
+ if (!events_dir_path) {
+ tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
+ return -1;
+ }
+ io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ put_events_file(events_dir_path);
+ if (events_dir.dirfd < 0) {
tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
return -1;
}
- while (!ret && (events_ent = readdir(events_dir))) {
+ while (!ret && (events_ent = io_dir__readdir(&events_dir))) {
if (!strcmp(events_ent->d_name, ".")
|| !strcmp(events_ent->d_name, "..")
|| !strcmp(events_ent->d_name, "enable")
@@ -639,8 +646,7 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
ret = add_tracepoint_event(parse_state, list, events_ent->d_name,
evt_name, err, head_config, loc);
}
-
- closedir(events_dir);
+ close(events_dir.dirfd);
return ret;
}
@@ -1660,7 +1666,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
if (!parse_events__filter_pmu(parse_state, pmu) &&
- perf_pmu__match(pmu, event_or_pmu)) {
+ perf_pmu__wildcard_match(pmu, event_or_pmu)) {
bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, *listp, pmu,
@@ -1974,48 +1980,55 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
int *force_grouped_idx = _fg_idx;
int lhs_sort_idx, rhs_sort_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
- bool lhs_has_group, rhs_has_group;
/*
- * First sort by grouping/leader. Read the leader idx only if the evsel
- * is part of a group, by default ungrouped events will be sorted
- * relative to grouped events based on where the first ungrouped event
- * occurs. If both events don't have a group we want to fall-through to
- * the arch specific sorting, that can reorder and fix things like
- * Intel's topdown events.
+ * Get the indexes of the 2 events to sort. If the events are
+ * in groups then the leader's index is used otherwise the
+ * event's index is used. An index may be forced for events that
+ * must be in the same group, namely Intel topdown events.
*/
- if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
- lhs_has_group = true;
- lhs_sort_idx = lhs_core->leader->idx;
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
+ lhs_sort_idx = *force_grouped_idx;
} else {
- lhs_has_group = false;
- lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
- ? *force_grouped_idx
- : lhs_core->idx;
- }
- if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
- rhs_has_group = true;
- rhs_sort_idx = rhs_core->leader->idx;
+ bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
+
+ lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx;
+ }
+ if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
+ rhs_sort_idx = *force_grouped_idx;
} else {
- rhs_has_group = false;
- rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
- ? *force_grouped_idx
- : rhs_core->idx;
+ bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
+
+ rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx;
}
+ /* If the indices differ then respect the insertion order. */
if (lhs_sort_idx != rhs_sort_idx)
return lhs_sort_idx - rhs_sort_idx;
- /* Group by PMU if there is a group. Groups can't span PMUs. */
- if (lhs_has_group && rhs_has_group) {
- lhs_pmu_name = lhs->group_pmu_name;
- rhs_pmu_name = rhs->group_pmu_name;
- ret = strcmp(lhs_pmu_name, rhs_pmu_name);
- if (ret)
- return ret;
- }
+ /*
+ * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should
+ * be in the same group. Events in the same group need to be ordered by
+ * their grouping PMU name as the group will be broken to ensure only
+ * events on the same PMU are programmed together.
+ *
+ * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both
+ * events are being forced to be at force_group_index. If only one event
+ * is being forced then the other event is the group leader of the group
+ * we're trying to force the event into. Ensure for the force grouped
+ * case that the PMU name ordering is also respected.
+ */
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
+ ret = strcmp(lhs_pmu_name, rhs_pmu_name);
+ if (ret)
+ return ret;
- /* Architecture specific sorting. */
+ /*
+ * Architecture specific sorting, by default sort events in the same
+ * group with the same PMU by their insertion index. On Intel topdown
+ * constraints must be adhered to - slots first, etc.
+ */
return arch_evlist__cmp(lhs, rhs);
}
@@ -2024,9 +2037,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
int idx = 0, force_grouped_idx = -1;
struct evsel *pos, *cur_leader = NULL;
struct perf_evsel *cur_leaders_grp = NULL;
- bool idx_changed = false, cur_leader_force_grouped = false;
+ bool idx_changed = false;
int orig_num_leaders = 0, num_leaders = 0;
int ret;
+ struct evsel *force_grouped_leader = NULL;
+ bool last_event_was_forced_leader = false;
/*
* Compute index to insert ungrouped events at. Place them where the
@@ -2049,10 +2064,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
pos->core.idx = idx++;
- /* Remember an index to sort all forced grouped events together to. */
- if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
- arch_evsel__must_be_in_group(pos))
- force_grouped_idx = pos->core.idx;
+ /*
+ * Remember an index to sort all forced grouped events
+ * together to. Use the group leader as some events
+ * must appear first within the group.
+ */
+ if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos_leader->core.idx;
}
/* Sort events. */
@@ -2080,31 +2098,66 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
* Set the group leader respecting the given groupings and that
* groups can't span PMUs.
*/
- if (!cur_leader)
+ if (!cur_leader) {
cur_leader = pos;
+ cur_leaders_grp = &pos->core;
+ if (pos_force_grouped)
+ force_grouped_leader = pos;
+ }
cur_leader_pmu_name = cur_leader->group_pmu_name;
- if ((cur_leaders_grp != pos->core.leader &&
- (!pos_force_grouped || !cur_leader_force_grouped)) ||
- strcmp(cur_leader_pmu_name, pos_pmu_name)) {
- /* Event is for a different group/PMU than last. */
+ if (strcmp(cur_leader_pmu_name, pos_pmu_name)) {
+ /* PMU changed so the group/leader must change. */
cur_leader = pos;
- /*
- * Remember the leader's group before it is overwritten,
- * so that later events match as being in the same
- * group.
- */
cur_leaders_grp = pos->core.leader;
+ if (pos_force_grouped && force_grouped_leader == NULL)
+ force_grouped_leader = pos;
+ } else if (cur_leaders_grp != pos->core.leader) {
+ bool split_even_if_last_leader_was_forced = true;
+
/*
- * Avoid forcing events into groups with events that
- * don't need to be in the group.
+ * Event is for a different group. If the last event was
+ * the forced group leader then subsequent group events
+ * and forced events should be in the same group. If
+ * there are no other forced group events then the
+ * forced group leader wasn't really being forced into a
+ * group, it just set arch_evsel__must_be_in_group, and
+ * we don't want the group to split here.
*/
- cur_leader_force_grouped = pos_force_grouped;
+ if (force_grouped_idx != -1 && last_event_was_forced_leader) {
+ struct evsel *pos2 = pos;
+ /*
+ * Search the whole list as the group leaders
+ * aren't currently valid.
+ */
+ list_for_each_entry_continue(pos2, list, core.node) {
+ if (pos->core.leader == pos2->core.leader &&
+ arch_evsel__must_be_in_group(pos2)) {
+ split_even_if_last_leader_was_forced = false;
+ break;
+ }
+ }
+ }
+ if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) {
+ if (pos_force_grouped) {
+ if (force_grouped_leader) {
+ cur_leader = force_grouped_leader;
+ cur_leaders_grp = force_grouped_leader->core.leader;
+ } else {
+ cur_leader = force_grouped_leader = pos;
+ cur_leaders_grp = &pos->core;
+ }
+ } else {
+ cur_leader = pos;
+ cur_leaders_grp = pos->core.leader;
+ }
+ }
}
if (pos_leader != cur_leader) {
/* The leader changed so update it. */
evsel__set_leader(pos, cur_leader);
}
+ last_event_was_forced_leader = (force_grouped_leader == pos);
}
list_for_each_entry(pos, list, core.node) {
struct evsel *pos_leader = evsel__leader(pos);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index bf7f73548605..7ed86e3e34e3 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,21 +53,25 @@ static int str(yyscan_t scanner, int token)
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
- if (text[0] != '\'') {
- yylval->str = strdup(text);
- } else {
- /*
- * If a text tag specified on the command line
- * contains opening single quite ' then it is
- * expected that the tag ends with single quote
- * as well, like this:
- * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
- * quotes need to be escaped to bypass shell
- * processing.
- */
- yylval->str = strndup(&text[1], strlen(text) - 2);
- }
+ yylval->str = strdup(text);
+ return token;
+}
+
+static int quoted_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+ /*
+ * If a text tag specified on the command line
+ * contains opening single quite ' then it is
+ * expected that the tag ends with single quote
+ * as well, like this:
+ * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+ * quotes need to be escaped to bypass shell
+ * processing.
+ */
+ yylval->str = strndup(&text[1], strlen(text) - 2);
return token;
}
@@ -235,9 +239,16 @@ event [^,{}/]+
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]{1,16}
num_raw_hex [a-fA-F0-9]{1,16}
-name [a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]*
-name_tag [\'][a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
-name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+/* Regular pattern to match the token PE_NAME. */
+name_start [a-zA-Z0-9_*?\[\]]
+name {name_start}[a-zA-Z0-9_*?.\[\]!\-]*
+/* PE_NAME token when inside a config term list, allows ':'. */
+term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]*
+/*
+ * PE_NAME token when quoted, allows ':,.='.
+ * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'.
+ */
+quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\']
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/*
* If you add a modifier you need to update check_modifier().
@@ -341,7 +352,9 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
{lc_type} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
-{name_minus} { return str(yyscanner, PE_NAME); }
+{num_dec} { return value(_parse_state, yyscanner, 10); }
+{num_hex} { return value(_parse_state, yyscanner, 16); }
+{term_name} { return str(yyscanner, PE_NAME); }
@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
}
@@ -410,7 +423,7 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); }
{modifier_event} { return modifiers(_parse_state, yyscanner); }
{name} { return str(yyscanner, PE_NAME); }
-{name_tag} { return str(yyscanner, PE_NAME); }
+{quoted_name} { return quoted_str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
, { BEGIN(event); return ','; }
: { return ':'; }
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index c7f3543b9921..66b666d9ce64 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type)
{
- if (pmu)
- return pmu->name;
-
switch (type) {
ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
- ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+ case PERF_TYPE_RAW:
+ return pmu ? pmu->name : "PERF_TYPE_RAW";
default:
- return NULL;
+ return pmu ? pmu->name : NULL;
}
}
static const char *stringify_perf_hw_id(u64 value)
{
- switch (value) {
+ switch (value & PERF_HW_EVENT_MASK) {
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
@@ -169,79 +167,100 @@ static const char *stringify_perf_sw_id(u64 value)
}
#undef ENUM_ID_TO_STR_CASE
-#define PRINT_ID(_s, _f) \
-do { \
- const char *__s = _s; \
- if (__s == NULL) \
- snprintf(buf, size, _f, value); \
- else \
- snprintf(buf, size, _f" (%s)", value, __s); \
-} while (0)
-#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
-#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
+static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%"PRIu64, value);
+ else
+ snprintf(buf, size, "%"PRIu64" (%s)", value, s);
+}
+
+static void print_id_hex(char *buf, size_t size, u64 value, const char *s)
+{
+ if (s == NULL)
+ snprintf(buf, size, "%#"PRIx64, value);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s)", value, s);
+}
-static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value)
+static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type)
{
- print_id_unsigned(stringify_perf_type_id(pmu, value));
+ print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type));
}
-static void __p_config_hw_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- print_id_hex(stringify_perf_hw_id(value));
+ const char *name = stringify_perf_hw_id(config);
+
+ if (name == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
+ } else {
+ if (pmu == NULL)
+ snprintf(buf, size, "%#"PRIx64" (%s)", config, name);
+ else
+ snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name);
+ }
}
-static void __p_config_sw_id(char *buf, size_t size, u64 value)
+static void __p_config_sw_id(char *buf, size_t size, u64 id)
{
- print_id_hex(stringify_perf_sw_id(value));
+ print_id_hex(buf, size, id, stringify_perf_sw_id(id));
}
-static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
{
- const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+ const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff);
const char *hw_cache_op_str =
- stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+ stringify_perf_hw_cache_op_id((config & 0xff00) >> 8);
const char *hw_cache_op_result_str =
- stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
-
- if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
- hw_cache_op_result_str == NULL) {
- snprintf(buf, size, "%#"PRIx64, value);
+ stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16);
+
+ if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) {
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64, config);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+ config);
+ }
} else {
- snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
- hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ if (pmu == NULL) {
+ snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ }
}
}
-static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 id)
{
- char *str = tracepoint_id_to_name(value);
+ char *str = tracepoint_id_to_name(id);
- print_id_hex(str);
+ print_id_hex(buf, size, id, str);
free(str);
}
-static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
+static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config)
{
- const char *name = perf_pmu__name_from_config(pmu, value);
-
- if (name) {
- print_id_hex(name);
- return;
- }
switch (type) {
case PERF_TYPE_HARDWARE:
- return __p_config_hw_id(buf, size, value);
+ return __p_config_hw_id(buf, size, pmu, config);
case PERF_TYPE_SOFTWARE:
- return __p_config_sw_id(buf, size, value);
+ return __p_config_sw_id(buf, size, config);
case PERF_TYPE_HW_CACHE:
- return __p_config_hw_cache_id(buf, size, value);
+ return __p_config_hw_cache_id(buf, size, pmu, config);
case PERF_TYPE_TRACEPOINT:
- return __p_config_tracepoint_id(buf, size, value);
+ return __p_config_tracepoint_id(buf, size, config);
case PERF_TYPE_RAW:
case PERF_TYPE_BREAKPOINT:
default:
- snprintf(buf, size, "%#"PRIx64, value);
- return;
+ return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config));
}
}
@@ -253,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
-#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val)
+#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val)
#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val)
#define PRINT_ATTRn(_n, _f, _p, _a) \
@@ -273,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
char buf[BUF_SIZE];
int ret = 0;
+ if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) {
+ u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT;
+
+ if (extended_type)
+ pmu = perf_pmus__find_by_type(extended_type);
+ }
+
PRINT_ATTRn("type", type, p_type_id, true);
PRINT_ATTRf(size, p_unsigned);
PRINT_ATTRn("config", config, p_config_id, true);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6206c8fe2bf9..b7ebac5ab1d1 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -13,6 +13,7 @@
#include <dirent.h>
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <locale.h>
#include <fnmatch.h>
#include <math.h>
@@ -36,12 +37,12 @@
#define UNIT_MAX_LEN 31 /* max length for event unit name */
enum event_source {
- /* An event loaded from /sys/devices/<pmu>/events. */
+ /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */
EVENT_SRC_SYSFS,
/* An event loaded from a CPUID matched json file. */
EVENT_SRC_CPU_JSON,
/*
- * An event loaded from a /sys/devices/<pmu>/identifier matched json
+ * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json
* file.
*/
EVENT_SRC_SYS_JSON,
@@ -195,19 +196,17 @@ static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_fo
*/
static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
{
- struct dirent *evt_ent;
- DIR *format_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir format_dir;
int ret = 0;
- format_dir = fdopendir(dirfd);
- if (!format_dir)
- return -EINVAL;
+ io_dir__init(&format_dir, dirfd);
- while ((evt_ent = readdir(format_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) {
struct perf_pmu_format *format;
char *name = evt_ent->d_name;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&format_dir, evt_ent))
continue;
format = perf_pmu__new_format(&pmu->format, name);
@@ -234,7 +233,7 @@ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_lo
}
}
- closedir(format_dir);
+ close(format_dir.dirfd);
return ret;
}
@@ -596,7 +595,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
};
if (pmu_events_table__find_event(pmu->events_table, pmu, name,
update_alias, &data) == 0)
- pmu->cpu_json_aliases++;
+ pmu->cpu_common_json_aliases++;
}
pmu->sysfs_aliases++;
break;
@@ -635,14 +634,12 @@ static inline bool pmu_alias_info_file(const char *name)
*/
static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
{
- struct dirent *evt_ent;
- DIR *event_dir;
+ struct io_dirent64 *evt_ent;
+ struct io_dir event_dir;
- event_dir = fdopendir(events_dir_fd);
- if (!event_dir)
- return -EINVAL;
+ io_dir__init(&event_dir, events_dir_fd);
- while ((evt_ent = readdir(event_dir))) {
+ while ((evt_ent = io_dir__readdir(&event_dir))) {
char *name = evt_ent->d_name;
int fd;
FILE *file;
@@ -674,7 +671,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd)
fclose(file);
}
- closedir(event_dir);
pmu->sysfs_aliases_loaded = true;
return 0;
}
@@ -783,7 +779,7 @@ static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is
}
/* Nothing found, for core PMUs assume this means all CPUs. */
- return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL;
+ return is_core ? cpu_map__online() : NULL;
}
static bool pmu_is_uncore(int dirfd, const char *name)
@@ -847,21 +843,23 @@ static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name,
}
/**
- * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any
- * trailing suffix? The Suffix must be in form
- * tok_{digits}, or tok{digits}.
+ * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only
+ * followed by nothing or a suffix? tok may contain
+ * part of a suffix.
* @pmu_name: The pmu_name with possible suffix.
- * @tok: The possible match to pmu_name without suffix.
+ * @tok: The wildcard argument to match.
*/
-static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok)
+static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok)
{
const char *p, *suffix;
bool has_hex = false;
+ size_t tok_len = strlen(tok);
- if (strncmp(pmu_name, tok, strlen(tok)))
+ /* Check start of pmu_name for equality. */
+ if (strncmp(pmu_name, tok, tok_len))
return false;
- suffix = p = pmu_name + strlen(tok);
+ suffix = p = pmu_name + tok_len;
if (*p == 0)
return true;
@@ -887,60 +885,84 @@ static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *to
}
/**
- * pmu_uncore_alias_match - does name match the PMU name?
- * @pmu_name: the json struct pmu_event name. This may lack a suffix (which
+ * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring
+ * any trailing suffix on pmu_name and
+ * tok? The Suffix must be in form
+ * tok_{digits}, or tok{digits}.
+ * @pmu_name: The pmu_name with possible suffix.
+ * @tok: The possible match to pmu_name.
+ */
+static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok)
+{
+ size_t pmu_name_len, tok_len;
+
+ /* For robustness, check for NULL. */
+ if (pmu_name == NULL)
+ return tok == NULL;
+
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(tok, "uncore_", 7))
+ tok += 7;
+
+ pmu_name_len = pmu_name_len_no_suffix(pmu_name);
+ tok_len = pmu_name_len_no_suffix(tok);
+ if (pmu_name_len != tok_len)
+ return false;
+
+ return strncmp(pmu_name, tok, pmu_name_len) == 0;
+}
+
+
+/**
+ * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name?
+ * @pmu_name: The pmu->name or pmu->alias to match against.
+ * @to_match: the json struct pmu_event name. This may lack a suffix (which
* matches) or be of the form "socket,pmuname" which will match
* "socketX_pmunameY".
- * @name: a real full PMU name as from sysfs.
*/
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match)
{
- char *tmp = NULL, *tok, *str;
- bool res;
-
- if (strchr(pmu_name, ',') == NULL)
- return perf_pmu__match_ignoring_suffix(name, pmu_name);
+ char *mutable_to_match, *tok, *tmp;
- str = strdup(pmu_name);
- if (!str)
+ if (!pmu_name)
return false;
- /*
- * uncore alias may be from different PMU with common prefix
- */
- tok = strtok_r(str, ",", &tmp);
- if (strncmp(pmu_name, tok, strlen(tok))) {
- res = false;
- goto out;
- }
+ /* uncore_ prefixes are ignored. */
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
+ if (!strncmp(to_match, "uncore_", 7))
+ to_match += 7;
- /*
- * Match more complex aliases where the alias name is a comma-delimited
- * list of tokens, orderly contained in the matching PMU name.
- *
- * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
- * match "socket" in "socketX_pmunameY" and then "pmuname" in
- * "pmunameY".
- */
- while (1) {
- char *next_tok = strtok_r(NULL, ",", &tmp);
+ if (strchr(to_match, ',') == NULL)
+ return perf_pmu__match_wildcard(pmu_name, to_match);
- name = strstr(name, tok);
- if (!name ||
- (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) {
- res = false;
- goto out;
+ /* Process comma separated list of PMU name components. */
+ mutable_to_match = strdup(to_match);
+ if (!mutable_to_match)
+ return false;
+
+ tok = strtok_r(mutable_to_match, ",", &tmp);
+ while (tok) {
+ size_t tok_len = strlen(tok);
+
+ if (strncmp(pmu_name, tok, tok_len)) {
+ /* Mismatch between part of pmu_name and tok. */
+ free(mutable_to_match);
+ return false;
}
- if (!next_tok)
- break;
- tok = next_tok;
- name += strlen(tok);
+ /* Move pmu_name forward over tok and suffix. */
+ pmu_name += tok_len;
+ while (*pmu_name != '\0' && isdigit(*pmu_name))
+ pmu_name++;
+ if (*pmu_name == '_')
+ pmu_name++;
+
+ tok = strtok_r(NULL, ",", &tmp);
}
-
- res = true;
-out:
- free(str);
- return res;
+ free(mutable_to_match);
+ return *pmu_name == '\0';
}
bool pmu_uncore_identifier_match(const char *compat, const char *id)
@@ -1003,11 +1025,19 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
{
struct perf_pmu *pmu = vdata;
- if (!pe->compat || !pe->pmu)
+ if (!pe->compat || !pe->pmu) {
+ /* No data to match. */
+ return 0;
+ }
+
+ if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) &&
+ !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) {
+ /* PMU name/alias_name don't match. */
return 0;
+ }
- if (pmu_uncore_alias_match(pe->pmu, pmu->name) &&
- pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ if (pmu_uncore_identifier_match(pe->compat, pmu->id)) {
+ /* Id matched. */
perf_pmu__new_alias(pmu,
pe->name,
pe->desc,
@@ -1016,7 +1046,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
pe,
EVENT_SRC_SYS_JSON);
}
-
return 0;
}
@@ -1851,9 +1880,10 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
if (pmu->cpu_aliases_added)
nr += pmu->cpu_json_aliases;
else if (pmu->events_table)
- nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases;
+ nr += pmu_events_table__num_events(pmu->events_table, pmu) -
+ pmu->cpu_common_json_aliases;
else
- assert(pmu->cpu_json_aliases == 0);
+ assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0);
if (perf_pmu__is_tool(pmu))
nr -= tool_pmu__num_skip_events();
@@ -1974,15 +2004,82 @@ out:
return ret;
}
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
+static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard)
{
- return !strcmp(pmu->name, pmu_name) ||
- (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) ||
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ if (pmu->is_core) {
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ if (!strcmp(to_match, "default_core")) {
+ /*
+ * jevents and tests use default_core as a marker for any core
+ * PMU as the PMU name varies across architectures.
+ */
+ return true;
+ }
+ return false;
+ }
+ if (!pmu->is_uncore) {
/*
- * jevents and tests use default_core as a marker for any core
- * PMU as the PMU name varies across architectures.
+ * PMU isn't core or uncore, some kind of broken CPU mask
+ * situation. Only match exact name.
*/
- (pmu->is_core && !strcmp(pmu_name, "default_core"));
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (!name)
+ continue;
+
+ if (!strcmp(name, to_match)) {
+ /* Exact name match. */
+ return true;
+ }
+ }
+ return false;
+ }
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *name = names[i];
+
+ if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match))
+ return true;
+ if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * perf_pmu__name_wildcard_match - Called by the jevents generated code to see
+ * if pmu matches the json to_match string.
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true);
+}
+
+/**
+ * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any
+ * trailing suffix on the pmu_name and/or tok?
+ * @pmu: The pmu whose name/alias to match.
+ * @to_match: The possible match to pmu_name.
+ */
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match)
+{
+ return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false);
}
bool perf_pmu__is_software(const struct perf_pmu *pmu)
@@ -2121,10 +2218,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu)
*/
int perf_pmu__caps_parse(struct perf_pmu *pmu)
{
- struct stat st;
char caps_path[PATH_MAX];
- DIR *caps_dir;
- struct dirent *evt_ent;
+ struct io_dir caps_dir;
+ struct io_dirent64 *evt_ent;
int caps_fd;
if (pmu->caps_initialized)
@@ -2135,24 +2231,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps"))
return -1;
- if (stat(caps_path, &st) < 0) {
+ caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
+ if (caps_fd == -1) {
pmu->caps_initialized = true;
return 0; /* no error if caps does not exist */
}
- caps_dir = opendir(caps_path);
- if (!caps_dir)
- return -EINVAL;
+ io_dir__init(&caps_dir, caps_fd);
- caps_fd = dirfd(caps_dir);
-
- while ((evt_ent = readdir(caps_dir)) != NULL) {
+ while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) {
char *name = evt_ent->d_name;
char value[128];
FILE *file;
int fd;
- if (!strcmp(name, ".") || !strcmp(name, ".."))
+ if (io_dir__is_dir(&caps_dir, evt_ent))
continue;
fd = openat(caps_fd, name, O_RDONLY);
@@ -2174,7 +2267,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
fclose(file);
}
- closedir(caps_dir);
+ close(caps_fd);
pmu->caps_initialized = true;
return pmu->nr_caps;
@@ -2229,29 +2322,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
name ?: "N/A", buf, config_name, config);
}
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok)
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match)
{
- const char *name = pmu->name;
- bool need_fnmatch = strisglob(tok);
+ const char *names[2] = {
+ pmu->name,
+ pmu->alias_name,
+ };
+ bool need_fnmatch = strisglob(wildcard_to_match);
- if (!strncmp(tok, "uncore_", 7))
- tok += 7;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(wildcard_to_match, "uncore_", 7))
+ wildcard_to_match += 7;
- if (perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0)))
- return true;
+ for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+ const char *pmu_name = names[i];
- name = pmu->alias_name;
- if (!name)
- return false;
+ if (!pmu_name)
+ continue;
- if (!strncmp(name, "uncore_", 7))
- name += 7;
+ if (!strncmp(pmu_name, "uncore_", 7))
+ pmu_name += 7;
- return perf_pmu__match_ignoring_suffix(name, tok) ||
- (need_fnmatch && !fnmatch(tok, name, 0));
+ if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) ||
+ (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0)))
+ return true;
+ }
+ return false;
}
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index dbed6c243a5e..b93014cc3670 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -37,6 +37,8 @@ struct perf_pmu_caps {
};
enum {
+ PERF_PMU_TYPE_PE_START = 0,
+ PERF_PMU_TYPE_PE_END = 0xFFFEFFFF,
PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
@@ -134,6 +136,11 @@ struct perf_pmu {
uint32_t cpu_json_aliases;
/** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */
uint32_t sys_json_aliases;
+ /**
+ * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when
+ * loading all sysfs events.
+ */
+ uint32_t cpu_common_json_aliases;
/** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */
bool sysfs_aliases_loaded;
/**
@@ -238,7 +245,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name);
size_t perf_pmu__num_events(struct perf_pmu *pmu);
int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
void *state, pmu_event_callback cb);
-bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
+bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match);
+bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match);
/**
* perf_pmu_is_software - is the PMU a software PMU as in it uses the
@@ -273,7 +281,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
const char *config_name);
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
-bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok);
+bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
int perf_pmu__pathname_scnprintf(char *buf, size_t size,
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index b493da0d22ef..b99292de7669 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -3,10 +3,10 @@
#include <linux/list_sort.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <api/io_dir.h>
#include <subcmd/pager.h>
#include <sys/types.h>
#include <ctype.h>
-#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
@@ -37,10 +37,25 @@
*/
static LIST_HEAD(core_pmus);
static LIST_HEAD(other_pmus);
-static bool read_sysfs_core_pmus;
-static bool read_sysfs_all_pmus;
+enum perf_tool_pmu_type {
+ PERF_TOOL_PMU_TYPE_PE_CORE,
+ PERF_TOOL_PMU_TYPE_PE_OTHER,
+ PERF_TOOL_PMU_TYPE_TOOL,
+ PERF_TOOL_PMU_TYPE_HWMON,
+
+#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
+#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
+#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
+#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
+
+#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
+ PERF_TOOL_PMU_TYPE_TOOL_MASK | \
+ PERF_TOOL_PMU_TYPE_HWMON_MASK)
+};
+static unsigned int read_pmu_types;
-static void pmu_read_sysfs(bool core_only);
+static void pmu_read_sysfs(unsigned int to_read_pmus);
size_t pmu_name_len_no_suffix(const char *str)
{
@@ -102,8 +117,7 @@ void perf_pmus__destroy(void)
perf_pmu__delete(pmu);
}
- read_sysfs_core_pmus = false;
- read_sysfs_all_pmus = false;
+ read_pmu_types = 0;
}
static struct perf_pmu *pmu_find(const char *name)
@@ -129,6 +143,7 @@ struct perf_pmu *perf_pmus__find(const char *name)
struct perf_pmu *pmu;
int dirfd;
bool core_pmu;
+ unsigned int to_read_pmus = 0;
/*
* Once PMU is loaded it stays in the list,
@@ -139,11 +154,11 @@ struct perf_pmu *perf_pmus__find(const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
dirfd = perf_pmu__event_source_devices_fd();
@@ -151,15 +166,27 @@ struct perf_pmu *perf_pmus__find(const char *name)
/*eager_load=*/false);
close(dirfd);
- if (!pmu) {
- /*
- * Looking up an inidividual PMU failed. This may mean name is
- * an alias, so read the PMUs from sysfs and try to find again.
- */
- pmu_read_sysfs(core_pmu);
+ if (pmu)
+ return pmu;
+
+ /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
+ if (!strncmp(name, "hwmon_", 6))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ else if (!strcmp(name, "tool"))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
+
+ if (to_read_pmus) {
+ pmu_read_sysfs(to_read_pmus);
pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
}
- return pmu;
+ /* Read all necessary PMUs from sysfs and see if the PMU is found. */
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK;
+ if (!core_pmu)
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ pmu_read_sysfs(to_read_pmus);
+ return pmu_find(name);
}
static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
@@ -176,11 +203,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
if (pmu)
return pmu;
- if (read_sysfs_all_pmus)
+ if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
return NULL;
core_pmu = is_pmu_core(name);
- if (core_pmu && read_sysfs_core_pmus)
+ if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
return NULL;
return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
@@ -197,52 +224,57 @@ static int pmus_cmp(void *priv __maybe_unused,
}
/* Add all pmus in sysfs to pmu list: */
-static void pmu_read_sysfs(bool core_only)
+static void pmu_read_sysfs(unsigned int to_read_types)
{
- int fd;
- DIR *dir;
- struct dirent *dent;
struct perf_pmu *tool_pmu;
- if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
+ if ((read_pmu_types & to_read_types) == to_read_types) {
+ /* All requested PMU types have been read. */
return;
+ }
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return;
+ if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+ bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0;
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return;
- }
+ if (fd < 0)
+ goto skip_pe_pmus;
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (core_only && !is_pmu_core(dent->d_name))
- continue;
- /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
- perf_pmu__find2(fd, dent->d_name);
- }
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (core_only && !is_pmu_core(dent->d_name))
+ continue;
+ /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
+ perf_pmu__find2(fd, dent->d_name);
+ }
- closedir(dir);
- if (list_empty(&core_pmus)) {
+ close(fd);
+ }
+skip_pe_pmus:
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) {
if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
pr_err("Failure to set up any core PMUs\n");
}
list_sort(NULL, &core_pmus, pmus_cmp);
- if (!core_only) {
- tool_pmu = perf_pmus__tool_pmu();
- list_add_tail(&tool_pmu->list, &other_pmus);
- perf_pmus__read_hwmon_pmus(&other_pmus);
+
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) {
+ tool_pmu = tool_pmu__new();
+ if (tool_pmu)
+ list_add_tail(&tool_pmu->list, &other_pmus);
}
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
+ perf_pmus__read_hwmon_pmus(&other_pmus);
+
list_sort(NULL, &other_pmus, pmus_cmp);
- if (!list_empty(&core_pmus)) {
- read_sysfs_core_pmus = true;
- if (!core_only)
- read_sysfs_all_pmus = true;
- }
+
+ read_pmu_types |= to_read_types;
}
static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
@@ -263,12 +295,21 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
{
+ unsigned int to_read_pmus;
struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
- if (pmu || read_sysfs_all_pmus)
+ if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK))
return pmu;
- pmu_read_sysfs(/*core_only=*/false);
+ if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ } else {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK;
+ }
+ pmu_read_sysfs(to_read_pmus);
pmu = __perf_pmus__find_by_type(type);
return pmu;
}
@@ -282,7 +323,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
bool use_core_pmus = !pmu || pmu->is_core;
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
}
if (use_core_pmus) {
@@ -300,7 +341,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
{
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/true);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK);
return list_first_entry_or_null(&core_pmus, typeof(*pmu), list);
}
list_for_each_entry_continue(pmu, &core_pmus, list)
@@ -316,7 +357,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
if (!pmu) {
- pmu_read_sysfs(/*core_only=*/false);
+ pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
pmu = list_prepare_entry(pmu, &core_pmus, list);
} else
last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
@@ -674,47 +715,28 @@ bool perf_pmus__supports_extended_type(void)
return perf_pmus__do_support_extended_type;
}
-char *perf_pmus__default_pmu_name(void)
-{
- int fd;
- DIR *dir;
- struct dirent *dent;
- char *result = NULL;
-
- if (!list_empty(&core_pmus))
- return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name);
-
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return strdup("cpu");
-
- dir = fdopendir(fd);
- if (!dir) {
- close(fd);
- return strdup("cpu");
- }
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- if (is_pmu_core(dent->d_name)) {
- result = strdup(dent->d_name);
- break;
- }
- }
-
- closedir(dir);
- return result ?: strdup("cpu");
-}
-
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
{
struct perf_pmu *pmu = evsel->pmu;
+ bool legacy_core_type;
- if (!pmu) {
- pmu = perf_pmus__find_by_type(evsel->core.attr.type);
- ((struct evsel *)evsel)->pmu = pmu;
+ if (pmu)
+ return pmu;
+
+ pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+ legacy_core_type =
+ evsel->core.attr.type == PERF_TYPE_HARDWARE ||
+ evsel->core.attr.type == PERF_TYPE_HW_CACHE;
+ if (!pmu && legacy_core_type) {
+ if (perf_pmus__supports_extended_type()) {
+ u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+
+ pmu = perf_pmus__find_by_type(type);
+ } else {
+ pmu = perf_pmus__find_core_pmu();
+ }
}
+ ((struct evsel *)evsel)->pmu = pmu;
return pmu;
}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index a0cb0eb2ff97..8def20e615ad 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -27,7 +27,6 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi
bool perf_pmus__have_event(const char *pname, const char *name);
int perf_pmus__num_core_pmus(void);
bool perf_pmus__supports_extended_type(void);
-char *perf_pmus__default_pmu_name(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1e769b68da37..3cc7c40f5097 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -973,6 +973,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
(unsigned long)dwarf_dieoffset(sp_die));
pf->fname = fname;
+ pf->abstrace_dieoffset = dwarf_dieoffset(sp_die);
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
pf->lno += pp->line;
@@ -1179,6 +1180,8 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
struct local_vars_finder *vf = data;
struct probe_finder *pf = vf->pf;
int tag;
+ Dwarf_Attribute attr;
+ Dwarf_Die var_die;
tag = dwarf_tag(die_mem);
if (tag == DW_TAG_formal_parameter ||
@@ -1196,10 +1199,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
}
}
- if (dwarf_haspc(die_mem, vf->pf->addr))
+ if (dwarf_haspc(die_mem, vf->pf->addr)) {
+ /*
+ * when DW_AT_entry_pc contains instruction address,
+ * also check if the DW_AT_abstract_origin of die_mem
+ * points to correct die.
+ */
+ if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) {
+ dwarf_formref_die(&attr, &var_die);
+ if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die))
+ goto out;
+ }
return DIE_FIND_CB_CONTINUE;
- else
- return DIE_FIND_CB_SIBLING;
+ }
+
+out:
+ return DIE_FIND_CB_SIBLING;
}
static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index dcf6cc1e1cbe..ecd6d937c592 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -63,6 +63,7 @@ struct probe_finder {
const char *fname; /* Real file name */
Dwarf_Die cu_die; /* Current CU */
Dwarf_Die sp_die;
+ Dwarf_Off abstrace_dieoffset;
struct intlist *lcache; /* Line cache for lazy match */
/* For variable searching */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a1d1e4ef6257..141ffa129c69 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key)
pstack->entries[pstack->top++] = key;
}
-void *pstack__pop(struct pstack *pstack)
-{
- void *ret;
-
- if (pstack->top == 0) {
- pr_err("%s: underflow!\n", __func__);
- return NULL;
- }
-
- ret = pstack->entries[--pstack->top];
- pstack->entries[pstack->top] = NULL;
- return ret;
-}
-
void *pstack__peek(struct pstack *pstack)
{
if (pstack->top == 0)
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index 8729b8be061d..712051b8130f 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack);
bool pstack__empty(const struct pstack *pstack);
void pstack__remove(struct pstack *pstack, void *key);
void pstack__push(struct pstack *pstack, void *key);
-void *pstack__pop(struct pstack *pstack);
void *pstack__peek(struct pstack *pstack);
#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index b4bc57859f73..f3c05da25b4a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -9,10 +9,12 @@
#include <event-parse.h>
#endif
#include <perf/mmap.h>
+#include "callchain.h"
#include "evlist.h"
#include "evsel.h"
#include "event.h"
#include "print_binary.h"
+#include "record.h"
#include "strbuf.h"
#include "thread_map.h"
#include "trace-event.h"
@@ -20,13 +22,6 @@
#include "util/sample.h"
#include <internal/lib.h>
-#define _PyUnicode_FromString(arg) \
- PyUnicode_FromString(arg)
-#define _PyUnicode_FromFormat(...) \
- PyUnicode_FromFormat(__VA_ARGS__)
-#define _PyLong_FromLong(arg) \
- PyLong_FromLong(arg)
-
PyMODINIT_FUNC PyInit_perf(void);
#define member_def(type, member, ptype, help) \
@@ -47,7 +42,7 @@ struct pyrf_event {
};
#define sample_members \
- sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \
sample_member_def(sample_pid, pid, T_INT, "event pid"), \
sample_member_def(sample_tid, tid, T_INT, "event tid"), \
sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
@@ -270,6 +265,12 @@ static PyMemberDef pyrf_sample_event__members[] = {
{ .name = NULL, },
};
+static void pyrf_sample_event__delete(struct pyrf_event *pevent)
+{
+ perf_sample__exit(&pevent->sample);
+ Py_TYPE(pevent)->tp_free((PyObject*)pevent);
+}
+
static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent)
{
PyObject *ret;
@@ -336,23 +337,14 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
{
const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
struct evsel *evsel = pevent->evsel;
+ struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *field;
- if (!evsel->tp_format) {
- struct tep_event *tp_format;
-
- tp_format = trace_event__tp_format_id(evsel->core.attr.config);
- if (IS_ERR_OR_NULL(tp_format))
- return NULL;
-
- evsel->tp_format = tp_format;
- }
-
- field = tep_find_any_field(evsel->tp_format, str);
- if (!field)
+ if (IS_ERR_OR_NULL(tp_format))
return NULL;
- return tracepoint_field(pevent, field);
+ field = tep_find_any_field(tp_format, str);
+ return field ? tracepoint_field(pevent, field) : NULL;
}
#endif /* HAVE_LIBTRACEEVENT */
@@ -428,6 +420,9 @@ static int pyrf_event__setup_types(void)
pyrf_sample_event__type.tp_new =
pyrf_context_switch_event__type.tp_new =
pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+
+ pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete,
+
err = PyType_Ready(&pyrf_mmap_event__type);
if (err < 0)
goto out;
@@ -481,6 +476,11 @@ static PyObject *pyrf_event__new(const union perf_event *event)
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
return NULL;
+ // FIXME this better be dynamic or we need to parse everything
+ // before calling perf_mmap__consume(), including tracepoint fields.
+ if (sizeof(pevent->event) < event->header.size)
+ return NULL;
+
ptype = pyrf_event__type[event->header.type];
pevent = PyObject_New(struct pyrf_event, ptype);
if (pevent != NULL)
@@ -802,6 +802,28 @@ static PyMethodDef pyrf_evsel__methods[] = {
{ .ml_name = NULL, }
};
+#define evsel_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.member), \
+ 0, help }
+
+#define evsel_attr_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_evsel, evsel.core.attr.member), \
+ 0, help }
+
+static PyMemberDef pyrf_evsel__members[] = {
+ evsel_member_def(tracking, T_BOOL, "tracking event."),
+ evsel_attr_member_def(type, T_UINT, "attribute type."),
+ evsel_attr_member_def(size, T_UINT, "attribute size."),
+ evsel_attr_member_def(config, T_ULONGLONG, "attribute config."),
+ evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."),
+ evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."),
+ evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."),
+ evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."),
+ { .name = NULL, },
+};
+
static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
static PyTypeObject pyrf_evsel__type = {
@@ -811,6 +833,7 @@ static PyTypeObject pyrf_evsel__type = {
.tp_dealloc = (destructor)pyrf_evsel__delete,
.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
.tp_doc = pyrf_evsel__doc,
+ .tp_members = pyrf_evsel__members,
.tp_methods = pyrf_evsel__methods,
.tp_init = (initproc)pyrf_evsel__init,
.tp_str = pyrf_evsel__str,
@@ -851,6 +874,16 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
}
+static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist)
+{
+ struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type);
+
+ if (pcpu_map)
+ pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus);
+
+ return (PyObject *)pcpu_map;
+}
+
static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
PyObject *args, PyObject *kwargs)
{
@@ -984,20 +1017,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
evsel = evlist__event2evsel(evlist, event);
if (!evsel) {
+ Py_DECREF(pyevent);
Py_INCREF(Py_None);
return Py_None;
}
pevent->evsel = evsel;
- err = evsel__parse_sample(evsel, event, &pevent->sample);
-
- /* Consume the even only after we parsed it out. */
perf_mmap__consume(&md->core);
- if (err)
+ err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample);
+ if (err) {
+ Py_DECREF(pyevent);
return PyErr_Format(PyExc_OSError,
"perf: can't parse sample, err=%d", err);
+ }
+
return pyevent;
}
end:
@@ -1019,8 +1054,53 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
return Py_None;
}
+static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist)
+{
+ struct record_opts opts = {
+ .sample_time = true,
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000,
+ .target = {
+ .uses_mmap = true,
+ .default_per_cpu = true,
+ },
+ .nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
+ .no_buffering = true,
+ .no_inherit = true,
+ };
+ struct evlist *evlist = &pevlist->evlist;
+
+ evlist__config(evlist, &opts, &callchain_param);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist)
+{
+ evlist__disable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist)
+{
+ evlist__enable(&pevlist->evlist);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
static PyMethodDef pyrf_evlist__methods[] = {
{
+ .ml_name = "all_cpus",
+ .ml_meth = (PyCFunction)pyrf_evlist__all_cpus,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.")
+ },
+ {
.ml_name = "mmap",
.ml_meth = (PyCFunction)pyrf_evlist__mmap,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1056,6 +1136,24 @@ static PyMethodDef pyrf_evlist__methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = PyDoc_STR("reads an event.")
},
+ {
+ .ml_name = "config",
+ .ml_meth = (PyCFunction)pyrf_evlist__config,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Apply default record options to the evlist.")
+ },
+ {
+ .ml_name = "disable",
+ .ml_meth = (PyCFunction)pyrf_evlist__disable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Disable the evsels in the evlist.")
+ },
+ {
+ .ml_name = "enable",
+ .ml_meth = (PyCFunction)pyrf_evlist__enable,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("Enable the evsels in the evlist.")
+ },
{ .ml_name = NULL, }
};
@@ -1254,6 +1352,8 @@ static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx);
evsel__clone(&pevsel->evsel, evsel);
+ if (evsel__is_group_leader(evsel))
+ evsel__set_leader(&pevsel->evsel, &pevsel->evsel);
return (PyObject *)pevsel;
}
@@ -1281,12 +1381,18 @@ static PyObject *pyrf__parse_events(PyObject *self, PyObject *args)
struct evlist evlist = {};
struct parse_events_error err;
PyObject *result;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
- if (!PyArg_ParseTuple(args, "s", &input))
+ if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads))
return NULL;
+ threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL;
+ cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL;
+
parse_events_error__init(&err);
- evlist__init(&evlist, NULL, NULL);
+ evlist__init(&evlist, cpus, threads);
if (parse_events(&evlist, input, &err)) {
parse_events_error__print(&err, input);
PyErr_SetFromErrno(PyExc_OSError);
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
deleted file mode 100644
index d927a0d25052..000000000000
--- a/tools/perf/util/rb_resort.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PERF_RESORT_RB_H_
-#define _PERF_RESORT_RB_H_
-/*
- * Template for creating a class to resort an existing rb_tree according to
- * a new sort criteria, that must be present in the entries of the source
- * rb_tree.
- *
- * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Quick example, resorting threads by its shortname:
- *
- * First define the prefix (threads) to be used for the functions and data
- * structures created, and provide an expression for the sorting, then the
- * fields to be present in each of the entries in the new, sorted, rb_tree.
- *
- * The body of the init function should collect the fields, maybe
- * pre-calculating them from multiple entries in the original 'entry' from
- * the rb_tree used as a source for the entries to be sorted:
-
-DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
- b->thread->shortname) < 0,
- struct thread *thread;
-)
-{
- entry->thread = rb_entry(nd, struct thread, rb_node);
-}
-
- * After this it is just a matter of instantiating it and iterating it,
- * for a few data structures with existing rb_trees, such as 'struct machine',
- * helpers are available to get the rb_root and the nr_entries:
-
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
-
- * This will instantiate the new rb_tree and a cursor for it, that can be used as:
-
- struct rb_node *nd;
-
- resort_rb__for_each_entry(nd, threads) {
- struct thread *t = threads_entry;
- printf("%s: %d\n", t->shortname, t->tid);
- }
-
- * Then delete it:
-
- resort_rb__delete(threads);
-
- * The name of the data structures and functions will have a _sorted suffix
- * right before the method names, i.e. will look like:
- *
- * struct threads_sorted_entry {}
- * threads_sorted__insert()
- */
-
-#define DEFINE_RESORT_RB(__name, __comp, ...) \
-struct __name##_sorted_entry { \
- struct rb_node rb_node; \
- __VA_ARGS__ \
-}; \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry); \
- \
-static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
-{ \
- struct __name##_sorted_entry *a, *b; \
- a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
- b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
- return __comp; \
-} \
- \
-struct __name##_sorted { \
- struct rb_root entries; \
- struct __name##_sorted_entry nd[0]; \
-}; \
- \
-static void __name##_sorted__insert(struct __name##_sorted *sorted, \
- struct rb_node *sorted_nd) \
-{ \
- struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
- while (*p != NULL) { \
- parent = *p; \
- if (__name##_sorted__cmp(sorted_nd, parent)) \
- p = &(*p)->rb_left; \
- else \
- p = &(*p)->rb_right; \
- } \
- rb_link_node(sorted_nd, parent, p); \
- rb_insert_color(sorted_nd, &sorted->entries); \
-} \
- \
-static void __name##_sorted__sort(struct __name##_sorted *sorted, \
- struct rb_root *entries) \
-{ \
- struct rb_node *nd; \
- unsigned int i = 0; \
- for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
- struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
- __name##_sorted__init_entry(nd, snd); \
- __name##_sorted__insert(sorted, &snd->rb_node); \
- } \
-} \
- \
-static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
- int nr_entries) \
-{ \
- struct __name##_sorted *sorted; \
- sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
- if (sorted) { \
- sorted->entries = RB_ROOT; \
- __name##_sorted__sort(sorted, entries); \
- } \
- return sorted; \
-} \
- \
-static void __name##_sorted__delete(struct __name##_sorted *sorted) \
-{ \
- free(sorted); \
-} \
- \
-static void __name##_sorted__init_entry(struct rb_node *nd, \
- struct __name##_sorted_entry *entry)
-
-#define DECLARE_RESORT_RB(__name) \
-struct __name##_sorted_entry *__name##_entry; \
-struct __name##_sorted *__name = __name##_sorted__new
-
-#define resort_rb__for_each_entry(__nd, __name) \
- for (__nd = rb_first(&__name->entries); \
- __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
- rb_node), __nd; \
- __nd = rb_next(__nd))
-
-#define resort_rb__delete(__name) \
- __name##_sorted__delete(__name), __name = NULL
-
-/*
- * Helpers for other classes that contains both an rbtree and the
- * number of entries in it:
- */
-
-/* For 'struct intlist' */
-#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
- DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
- __ilist->rblist.nr_entries)
-
-#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 30638653ad2d..0ce52f0280b8 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos,
.period = 1
};
union perf_event event;
+ int ret;
memset(&event, 0, sizeof(event));
if (basic->CL == 1) /* Native LPAR mode */
@@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos,
pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
__func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
sample.tid, sample.cpumode, sample.cpu);
- if (perf_session__deliver_synth_event(sfq->sf->session, &event,
- &sample)) {
+ ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample);
+ perf_sample__exit(&sample);
+ if (ret) {
pr_err("s390 Auxiliary Trace: failed to deliver event\n");
return false;
}
diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c
new file mode 100644
index 000000000000..605fee971f55
--- /dev/null
+++ b/tools/perf/util/sample.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "sample.h"
+#include "debug.h"
+#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+void perf_sample__init(struct perf_sample *sample, bool all)
+{
+ if (all) {
+ memset(sample, 0, sizeof(*sample));
+ } else {
+ sample->user_regs = NULL;
+ sample->intr_regs = NULL;
+ }
+}
+
+void perf_sample__exit(struct perf_sample *sample)
+{
+ free(sample->user_regs);
+ free(sample->intr_regs);
+}
+
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample)
+{
+ if (!sample->user_regs) {
+ sample->user_regs = zalloc(sizeof(*sample->user_regs));
+ if (!sample->user_regs)
+ pr_err("Failure to allocate sample user_regs");
+ }
+ return sample->user_regs;
+}
+
+
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample)
+{
+ if (!sample->intr_regs) {
+ sample->intr_regs = zalloc(sizeof(*sample->intr_regs));
+ if (!sample->intr_regs)
+ pr_err("Failure to allocate sample intr_regs");
+ }
+ return sample->intr_regs;
+}
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 70b2c3135555..0e96240052e9 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,7 +67,7 @@ struct aux_sample {
};
struct simd_flags {
- u64 arch:1, /* architecture (isa) */
+ u8 arch:1, /* architecture (isa) */
pred:2; /* predication */
};
@@ -114,14 +114,19 @@ struct perf_sample {
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
u64 *branch_stack_cntr;
- struct regs_dump user_regs;
- struct regs_dump intr_regs;
+ struct regs_dump *user_regs;
+ struct regs_dump *intr_regs;
struct stack_dump user_stack;
struct sample_read read;
struct aux_sample aux_sample;
struct simd_flags simd_flags;
};
+void perf_sample__init(struct perf_sample *sample, bool all);
+void perf_sample__exit(struct perf_sample *sample);
+struct regs_dump *perf_sample__user_regs(struct perf_sample *sample);
+struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample);
+
/*
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
* 8-byte alignment.
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index b1b5e94537e4..520729e78965 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -745,19 +745,30 @@ static int set_regs_in_dict(PyObject *dict,
const char *arch = perf_env__arch(evsel__env(evsel));
int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1;
- char *bf = malloc(size);
- if (!bf)
- return -1;
+ char *bf = NULL;
- regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
+ if (sample->intr_regs) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
- pydict_set_item_string_decref(dict, "iregs",
- _PyUnicode_FromString(bf));
+ regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
- regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size);
+ pydict_set_item_string_decref(dict, "iregs",
+ _PyUnicode_FromString(bf));
+ }
- pydict_set_item_string_decref(dict, "uregs",
- _PyUnicode_FromString(bf));
+ if (sample->user_regs) {
+ if (!bf) {
+ bf = malloc(size);
+ if (!bf)
+ return -1;
+ }
+ regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size);
+
+ pydict_set_item_string_decref(dict, "uregs",
+ _PyUnicode_FromString(bf));
+ }
free(bf);
return 0;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c06e3020a976..60fb9997ea0d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -950,7 +950,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a
static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *user_regs = &sample->user_regs;
+ struct regs_dump *user_regs;
+
+ if (!sample->user_regs)
+ return;
+
+ user_regs = perf_sample__user_regs(sample);
if (user_regs->regs)
regs__printf("user", user_regs, arch);
@@ -958,7 +963,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch)
static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
- struct regs_dump *intr_regs = &sample->intr_regs;
+ struct regs_dump *intr_regs;
+
+ if (!sample->intr_regs)
+ return;
+
+ intr_regs = perf_sample__intr_regs(sample);
if (intr_regs->regs)
regs__printf("intr", intr_regs, arch);
@@ -1351,25 +1361,30 @@ static int perf_session__deliver_event(struct perf_session *session,
const char *file_path)
{
struct perf_sample sample;
- int ret = evlist__parse_sample(session->evlist, event, &sample);
+ int ret;
+ perf_sample__init(&sample, /*all=*/false);
+ ret = evlist__parse_sample(session->evlist, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
+ goto out;
}
ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
- return ret;
- if (ret > 0)
- return 0;
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
ret = machines__deliver_event(&session->machines, session->evlist,
event, &sample, tool, file_offset, file_path);
if (dump_trace && sample.aux_sample.size)
auxtrace__dump_auxtrace_sample(session, &sample);
-
+out:
+ perf_sample__exit(&sample);
return ret;
}
@@ -1380,10 +1395,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
{
struct ordered_events *oe = &session->ordered_events;
const struct perf_tool *tool = session->tool;
- struct perf_sample sample = { .time = 0, };
+ struct perf_sample sample;
int fd = perf_data__fd(session->data);
int err;
+ perf_sample__init(&sample, /*all=*/true);
if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
@@ -1395,15 +1411,17 @@ static s64 perf_session__process_user_event(struct perf_session *session,
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
}
- return err;
+ break;
case PERF_RECORD_EVENT_UPDATE:
- return tool->event_update(tool, event, &session->evlist);
+ err = tool->event_update(tool, event, &session->evlist);
+ break;
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
* Deprecated, but we need to handle it for sake
* of old data files create in pipe mode.
*/
- return 0;
+ err = 0;
+ break;
case PERF_RECORD_HEADER_TRACING_DATA:
/*
* Setup for reading amidst mmap, but only when we
@@ -1412,15 +1430,20 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset, SEEK_SET);
- return tool->tracing_data(session, event);
+ err = tool->tracing_data(session, event);
+ break;
case PERF_RECORD_HEADER_BUILD_ID:
- return tool->build_id(session, event);
+ err = tool->build_id(session, event);
+ break;
case PERF_RECORD_FINISHED_ROUND:
- return tool->finished_round(tool, event, oe);
+ err = tool->finished_round(tool, event, oe);
+ break;
case PERF_RECORD_ID_INDEX:
- return tool->id_index(session, event);
+ err = tool->id_index(session, event);
+ break;
case PERF_RECORD_AUXTRACE_INFO:
- return tool->auxtrace_info(session, event);
+ err = tool->auxtrace_info(session, event);
+ break;
case PERF_RECORD_AUXTRACE:
/*
* Setup for reading amidst mmap, but only when we
@@ -1429,35 +1452,48 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset + event->header.size, SEEK_SET);
- return tool->auxtrace(session, event);
+ err = tool->auxtrace(session, event);
+ break;
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
- return tool->auxtrace_error(session, event);
+ err = tool->auxtrace_error(session, event);
+ break;
case PERF_RECORD_THREAD_MAP:
- return tool->thread_map(session, event);
+ err = tool->thread_map(session, event);
+ break;
case PERF_RECORD_CPU_MAP:
- return tool->cpu_map(session, event);
+ err = tool->cpu_map(session, event);
+ break;
case PERF_RECORD_STAT_CONFIG:
- return tool->stat_config(session, event);
+ err = tool->stat_config(session, event);
+ break;
case PERF_RECORD_STAT:
- return tool->stat(session, event);
+ err = tool->stat(session, event);
+ break;
case PERF_RECORD_STAT_ROUND:
- return tool->stat_round(session, event);
+ err = tool->stat_round(session, event);
+ break;
case PERF_RECORD_TIME_CONV:
session->time_conv = event->time_conv;
- return tool->time_conv(session, event);
+ err = tool->time_conv(session, event);
+ break;
case PERF_RECORD_HEADER_FEATURE:
- return tool->feature(session, event);
+ err = tool->feature(session, event);
+ break;
case PERF_RECORD_COMPRESSED:
err = tool->compressed(session, event, file_offset, file_path);
if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path);
- return err;
+ break;
case PERF_RECORD_FINISHED_INIT:
- return tool->finished_init(session, event);
+ err = tool->finished_init(session, event);
+ break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ break;
}
+ perf_sample__exit(&sample);
+ return err;
}
int perf_session__deliver_synth_event(struct perf_session *session,
@@ -2403,6 +2439,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
return false;
}
+bool perf_session__has_switch_events(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
{
char *bracket;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index bcf1bcf06959..db1c120a9e67 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *session, const char *msg);
+bool perf_session__has_switch_events(struct perf_session *session);
void perf_event__attr_swap(struct perf_event_attr *attr);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 649550e9b7aa..dd289d15acfd 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,6 +3,7 @@ from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
+assert cc, "Environment variable CC not set"
# Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..."
cc_tokens = cc.split()
@@ -12,8 +13,13 @@ if len(cc_tokens) > 1:
else:
cc_options = ""
+# ignore optional stderr could be None as it is set to PIPE to avoid that.
+# mypy: disable-error-code="union-attr"
cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
-src_feature_tests = getenv('srctree') + '/tools/build/feature'
+
+srctree = getenv('srctree')
+assert srctree, "Environment variable srctree, for the Linux sources, not set"
+src_feature_tests = f'{srctree}/tools/build/feature'
def clang_has_option(option):
cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
@@ -71,7 +77,7 @@ else:
# The python headers have mixed code with declarations (decls after asserts, for instance)
cflags += [ "-Wno-declaration-after-statement" ]
-src_perf = getenv('srctree') + '/tools/perf'
+src_perf = f'{srctree}/tools/perf'
build_lib = getenv('PYTHON_EXTBUILD_LIB')
build_tmp = getenv('PYTHON_EXTBUILD_TMP')
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3dd33721823f..c51049087e4e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -892,6 +892,38 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort parallelism */
+
+static int64_t
+sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->parallelism - left->parallelism;
+}
+
+static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const unsigned long *parallelism_filter = arg;
+
+ if (type != HIST_FILTER__PARALLELISM)
+ return -1;
+
+ return test_bit(he->parallelism, parallelism_filter);
+}
+
+static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*d", width, he->parallelism);
+}
+
+struct sort_entry sort_parallelism = {
+ .se_header = "Parallelism",
+ .se_cmp = sort__parallelism_cmp,
+ .se_filter = hist_entry__parallelism_filter,
+ .se_snprintf = hist_entry__parallelism_snprintf,
+ .se_width_idx = HISTC_PARALLELISM,
+};
+
/* --sort cgroup_id */
static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
@@ -2371,44 +2403,19 @@ sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right)
return left->mem_type_off - right->mem_type_off;
}
-static void fill_member_name(char *buf, size_t sz, struct annotated_member *m,
- int offset, bool first)
-{
- struct annotated_member *child;
-
- if (list_empty(&m->children))
- return;
-
- list_for_each_entry(child, &m->children, node) {
- if (child->offset <= offset && offset < child->offset + child->size) {
- int len = 0;
-
- /* It can have anonymous struct/union members */
- if (child->var_name) {
- len = scnprintf(buf, sz, "%s%s",
- first ? "" : ".", child->var_name);
- first = false;
- }
-
- fill_member_name(buf + len, sz - len, child, offset, first);
- return;
- }
- }
-}
-
static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
char buf[4096];
- buf[0] = '\0';
- if (list_empty(&he_type->self.children))
- snprintf(buf, sizeof(buf), "no field");
- else
- fill_member_name(buf, sizeof(buf), &he_type->self,
- he->mem_type_off, true);
- buf[4095] = '\0';
+ if (he_type == &unknown_type || he_type == &stackop_type ||
+ he_type == &canary_type)
+ return repsep_snprintf(bf, size, "%s", he_type->self.type_name);
+
+ if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf),
+ he->mem_type_off))
+ scnprintf(buf, sizeof(buf), "no field");
return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name,
he->mem_type_off, buf);
@@ -2534,6 +2541,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
+ DIM(SORT_PARALLELISM, "parallelism", sort_parallelism),
};
#undef DIM
@@ -2589,17 +2597,20 @@ struct hpp_dimension {
const char *name;
struct perf_hpp_fmt *fmt;
int taken;
+ int was_taken;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD, "overhead"),
+ DIM(PERF_HPP__LATENCY, "latency"),
DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+ DIM(PERF_HPP__LATENCY_ACC, "latency_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
DIM(PERF_HPP__WEIGHT1, "weight1"),
@@ -2735,6 +2746,7 @@ MK_SORT_ENTRY_CHK(thread)
MK_SORT_ENTRY_CHK(comm)
MK_SORT_ENTRY_CHK(dso)
MK_SORT_ENTRY_CHK(sym)
+MK_SORT_ENTRY_CHK(parallelism)
static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
@@ -3477,6 +3489,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
return -1;
hd->taken = 1;
+ hd->was_taken = 1;
perf_hpp_list__register_sort_field(list, fmt);
return 0;
}
@@ -3511,10 +3524,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
return 0;
}
-int hpp_dimension__add_output(unsigned col)
+int hpp_dimension__add_output(unsigned col, bool implicit)
{
+ struct hpp_dimension *hd;
+
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+ hd = &hpp_sort_dimensions[col];
+ if (implicit && !hd->was_taken)
+ return 0;
+ return __hpp_dimension__add_output(&perf_hpp_list, hd);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
@@ -3639,6 +3657,34 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
return -ESRCH;
}
+/* This should match with sort_dimension__add() above */
+static bool is_hpp_sort_key(const char *key)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) {
+ if (!strcmp(arch_specific_sort_keys[i], key) &&
+ !arch_support_sort_key(key)) {
+ return false;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (sd->name && !strncasecmp(key, sd->name, strlen(key)))
+ return false;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (!strncasecmp(key, hd->name, strlen(key)))
+ return true;
+ }
+ return false;
+}
+
static int setup_sort_list(struct perf_hpp_list *list, char *str,
struct evlist *evlist)
{
@@ -3646,7 +3692,9 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
int ret = 0;
int level = 0;
int next_level = 1;
+ int prev_level = 0;
bool in_group = false;
+ bool prev_was_hpp = false;
do {
tok = str;
@@ -3667,6 +3715,19 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
}
if (*tok) {
+ if (is_hpp_sort_key(tok)) {
+ /* keep output (hpp) sort keys in the same level */
+ if (prev_was_hpp) {
+ bool next_same = (level == next_level);
+
+ level = prev_level;
+ next_level = next_same ? level : level+1;
+ }
+ prev_was_hpp = true;
+ } else {
+ prev_was_hpp = false;
+ }
+
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
@@ -3678,6 +3739,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
ui__error("Unknown --sort key: `%s'", tok);
break;
}
+ prev_level = level;
}
level = next_level;
@@ -3773,10 +3835,24 @@ static char *setup_overhead(char *keys)
if (sort__mode == SORT_MODE__DIFF)
return keys;
- keys = prefix_if_not_in("overhead", keys);
-
- if (symbol_conf.cumulate_callchain)
- keys = prefix_if_not_in("overhead_children", keys);
+ if (symbol_conf.prefer_latency) {
+ keys = prefix_if_not_in("overhead", keys);
+ keys = prefix_if_not_in("latency", keys);
+ if (symbol_conf.cumulate_callchain) {
+ keys = prefix_if_not_in("overhead_children", keys);
+ keys = prefix_if_not_in("latency_children", keys);
+ }
+ } else if (!keys || (!strstr(keys, "overhead") &&
+ !strstr(keys, "latency"))) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency", keys);
+ keys = prefix_if_not_in("overhead", keys);
+ if (symbol_conf.cumulate_callchain) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency_children", keys);
+ keys = prefix_if_not_in("overhead_children", keys);
+ }
+ }
return keys;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a8572574e168..180d36a2bea3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -72,6 +72,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
SORT_ANNOTATE_DATA_TYPE_CACHELINE,
+ SORT_PARALLELISM,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -140,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);
-int hpp_dimension__add_output(unsigned col);
+int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ba79f73e1cf5..e852ac0d9847 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -929,12 +929,16 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
}
-static void uniquify_event_name(struct evsel *counter)
+static void evsel__uniquify_counter(struct evsel *counter)
{
const char *name, *pmu_name;
char *new_name, *config;
int ret;
+ /* No uniquification necessary. */
+ if (!counter->needs_uniquify)
+ return;
+
/* The evsel was already uniquified. */
if (counter->uniquified_name)
return;
@@ -942,19 +946,6 @@ static void uniquify_event_name(struct evsel *counter)
/* Avoid checking to uniquify twice. */
counter->uniquified_name = true;
- /* The evsel has a "name=" config term or is from libpfm. */
- if (counter->use_config_name || counter->is_libpfm_event)
- return;
-
- /* Legacy no PMU event, don't uniquify. */
- if (!counter->pmu ||
- (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW))
- return;
-
- /* A sysfs or json event replacing a legacy event, don't uniquify. */
- if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX)
- return;
-
name = evsel__name(counter);
pmu_name = counter->pmu->name;
/* Already prefixed by the PMU name. */
@@ -993,17 +984,6 @@ static void uniquify_event_name(struct evsel *counter)
}
}
-static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
-{
- return evsel__is_hybrid(evsel) && !config->hybrid_merge;
-}
-
-static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
-{
- if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config))
- uniquify_event_name(counter);
-}
-
/**
* should_skip_zero_count() - Check if the event should print 0 values.
* @config: The perf stat configuration (including aggregation mode).
@@ -1089,7 +1069,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
if (counter->merged_stat)
return;
- uniquify_counter(config, counter);
+ evsel__uniquify_counter(counter);
val = aggr->counts.val;
ena = aggr->counts.ena;
@@ -1670,7 +1650,8 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
-static void disable_uniquify(struct evlist *evlist)
+/* Should uniquify be disabled for the evlist? */
+static bool evlist__disable_uniquify(const struct evlist *evlist)
{
struct evsel *counter;
struct perf_pmu *last_pmu = NULL;
@@ -1679,20 +1660,84 @@ static void disable_uniquify(struct evlist *evlist)
evlist__for_each_entry(evlist, counter) {
/* If PMUs vary then uniquify can be useful. */
if (!first && counter->pmu != last_pmu)
- return;
+ return false;
first = false;
if (counter->pmu) {
/* Allow uniquify for uncore PMUs. */
if (!counter->pmu->is_core)
- return;
+ return false;
/* Keep hybrid event names uniquified for clarity. */
if (perf_pmus__num_core_pmus() > 1)
- return;
+ return false;
+ }
+ }
+ return true;
+}
+
+static void evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
+{
+ struct evsel *evsel;
+
+ if (counter->merged_stat) {
+ /* Counter won't be shown. */
+ return;
+ }
+
+ if (counter->use_config_name || counter->is_libpfm_event) {
+ /* Original name will be used. */
+ return;
+ }
+
+ if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
+ /* Unique hybrid counters necessary. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
+ /* Legacy event, don't uniquify. */
+ return;
+ }
+
+ if (counter->pmu && counter->pmu->is_core &&
+ counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ return;
+ }
+
+ if (config->aggr_mode == AGGR_NONE) {
+ /* Always unique with no aggregation. */
+ counter->needs_uniquify = true;
+ return;
+ }
+
+ /*
+ * Do other non-merged events in the evlist have the same name? If so
+ * uniquify is necessary.
+ */
+ evlist__for_each_entry(counter->evlist, evsel) {
+ if (evsel == counter || evsel->merged_stat)
+ continue;
+
+ if (evsel__name_is(counter, evsel__name(evsel))) {
+ counter->needs_uniquify = true;
+ return;
}
}
- evlist__for_each_entry_continue(evlist, counter) {
- counter->uniquified_name = true;
+}
+
+static void evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config)
+{
+ struct evsel *counter;
+
+ if (evlist__disable_uniquify(evlist)) {
+ evlist__for_each_entry(evlist, counter)
+ counter->uniquified_name = true;
+ return;
}
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_needs_uniquify(counter, config);
}
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
@@ -1706,7 +1751,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
.first = true,
};
- disable_uniquify(evlist);
+ evlist__set_needs_uniquify(evlist, config);
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index fa8b2a1048ff..d83bda5824d2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -151,6 +151,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
{
struct evsel *cur;
int evsel_ctx = evsel_context(evsel);
+ struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
evlist__for_each_entry(evsel->evlist, cur) {
struct perf_stat_aggr *aggr;
@@ -177,7 +178,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
* Except the SW CLOCK events,
* ignore if not the PMU we're looking for.
*/
- if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu))
+ if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur)))
continue;
aggr = &cur->stats->aggr[aggr_idx];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 7c2ccdcc3fdb..1f7abd8754c7 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -535,7 +535,10 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
return 0;
}
-/* events should have the same name, scale, unit, cgroup but on different PMUs */
+/*
+ * Events should have the same name, scale, unit, cgroup but on different core
+ * PMUs or on different but matching uncore PMUs.
+ */
static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
{
if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
@@ -553,7 +556,13 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
return false;
- return evsel_a->pmu != evsel_b->pmu;
+ if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL)
+ return false;
+
+ if (evsel_a->pmu->is_core)
+ return evsel_b->pmu->is_core;
+
+ return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name);
}
static void evsel__merge_aliases(struct evsel *evsel)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 66fd1249660a..fbf6d0f73af9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -7,6 +7,7 @@
#include <unistd.h>
#include <inttypes.h>
+#include "compress.h"
#include "dso.h"
#include "map.h"
#include "maps.h"
@@ -1173,33 +1174,6 @@ out:
#endif
-static int dso__swap_init(struct dso *dso, unsigned char eidata)
-{
- static unsigned int const endian = 1;
-
- dso__set_needs_swap(dso, DSO_SWAP__NO);
-
- switch (eidata) {
- case ELFDATA2LSB:
- /* We are big endian, DSO is little endian. */
- if (*(unsigned char const *)&endian != 1)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- case ELFDATA2MSB:
- /* We are little endian, DSO is big endian. */
- if (*(unsigned char const *)&endian != 0)
- dso__set_needs_swap(dso, DSO_SWAP__YES);
- break;
-
- default:
- pr_err("unrecognized DSO data encoding %d\n", eidata);
- return -EINVAL;
- }
-
- return 0;
-}
-
bool symsrc__possibly_runtime(struct symsrc *ss)
{
return ss->dynsym || ss->opdsec;
@@ -1228,6 +1202,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
ehdr.e_type == ET_DYN;
}
+static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret)
+{
+ Elf *elf_embedded;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
+ FILE *wrapped;
+ size_t shndx;
+ char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX";
+ int ret, temp_fd;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx);
+ if (!scn) {
+ *dso__load_errno(dso) = -ENOENT;
+ return NULL;
+ }
+
+ if (shdr.sh_type == SHT_NOBITS) {
+ pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name);
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ scn_data = elf_rawdata(scn, NULL);
+ if (!scn_data) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ return NULL;
+ }
+
+ wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r");
+ if (!wrapped) {
+ pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ return NULL;
+ }
+
+ temp_fd = mkstemp(temp_filename);
+ if (temp_fd < 0) {
+ pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno));
+ *dso__load_errno(dso) = -errno;
+ fclose(wrapped);
+ return NULL;
+ }
+ unlink(temp_filename);
+
+ ret = lzma_decompress_stream_to_file(wrapped, temp_fd);
+ fclose(wrapped);
+ if (ret < 0) {
+ *dso__load_errno(dso) = -errno;
+ close(temp_fd);
+ return NULL;
+ }
+
+ elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (!elf_embedded) {
+ pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__,
+ name, elf_errmsg(-1));
+ *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
+ close(temp_fd);
+ return NULL;
+ }
+ pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name);
+ *fd_ret = temp_fd;
+ return elf_embedded;
+}
+
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
@@ -1256,6 +1305,19 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
goto out_close;
}
+ if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) {
+ int new_fd;
+ Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd);
+
+ if (!embedded)
+ goto out_close;
+
+ elf_end(elf);
+ close(fd);
+ fd = new_fd;
+ elf = embedded;
+ }
+
if (gelf_getehdr(elf, &ehdr) == NULL) {
*dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF;
pr_debug("%s: cannot get elf header.\n", __func__);
@@ -1854,10 +1916,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
kmodule, 1);
if (err < 0)
return err;
- err += nr;
+ nr += err;
}
- return err;
+ /*
+ * The .gnu_debugdata is a special situation: it contains a symbol
+ * table, but the runtime file may also contain dynsym entries which are
+ * not present there. We need to load both.
+ */
+ if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) {
+ err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss,
+ kmodule, 1);
+ if (err < 0)
+ return err;
+ nr += err;
+ }
+
+ return nr;
}
static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 49b08adc6ee3..11540219481b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -18,6 +18,7 @@
#include "annotate.h"
#include "build-id.h"
#include "cap.h"
+#include "cpumap.h"
#include "dso.h"
#include "util.h" // lsdir()
#include "debug.h"
@@ -84,6 +85,7 @@ static enum dso_binary_type binary_type_symtab[] = {
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__GNU_DEBUGDATA,
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
DSO_BINARY_TYPE__GUEST_KMODULE,
DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
@@ -1716,6 +1718,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__GNU_DEBUGDATA:
return !kmod && dso__kernel(dso) == DSO_SPACE__USER;
case DSO_BINARY_TYPE__KALLSYMS:
@@ -2471,6 +2474,36 @@ int symbol__annotation_init(void)
return 0;
}
+static int setup_parallelism_bitmap(void)
+{
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int i, err = -1;
+
+ if (symbol_conf.parallelism_list_str == NULL)
+ return 0;
+
+ map = perf_cpu_map__new(symbol_conf.parallelism_list_str);
+ if (map == NULL) {
+ pr_err("failed to parse parallelism filter list\n");
+ return -1;
+ }
+
+ bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) {
+ pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu);
+ goto out_delete_map;
+ }
+ __clear_bit(cpu.cpu, symbol_conf.parallelism_filter);
+ }
+
+ err = 0;
+out_delete_map:
+ perf_cpu_map__put(map);
+ return err;
+}
+
int symbol__init(struct perf_env *env)
{
const char *symfs;
@@ -2490,6 +2523,9 @@ int symbol__init(struct perf_env *env)
return -1;
}
+ if (setup_parallelism_bitmap())
+ return -1;
+
if (setup_list(&symbol_conf.dso_list,
symbol_conf.dso_list_str, "dso") < 0)
return -1;
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index a9c51acc722f..cd9aa82c7d5a 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -3,6 +3,8 @@
#define __PERF_SYMBOL_CONF 1
#include <stdbool.h>
+#include <linux/bitmap.h>
+#include "perf.h"
struct strlist;
struct intlist;
@@ -47,7 +49,9 @@ struct symbol_conf {
keep_exited_threads,
annotate_data_member,
annotate_data_sample,
- skip_empty;
+ skip_empty,
+ enable_latency,
+ prefer_latency;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
@@ -62,6 +66,7 @@ struct symbol_conf {
*pid_list_str,
*tid_list_str,
*sym_list_str,
+ *parallelism_list_str,
*col_width_list_str,
*bt_stop_list_str;
const char *addr2line_path;
@@ -82,6 +87,7 @@ struct symbol_conf {
int pad_output_len_dso;
int group_sort_idx;
int addr_range;
+ DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1);
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 6923b0d5efed..2fc4d0537840 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -38,6 +38,7 @@
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
#include <api/io.h>
+#include <api/io_dir.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -767,10 +768,10 @@ static int __event__synthesize_thread(union perf_event *comm_event,
bool needs_mmap, bool mmap_data)
{
char filename[PATH_MAX];
- struct dirent **dirent;
+ struct io_dir iod;
+ struct io_dirent64 *dent;
pid_t tgid, ppid;
int rc = 0;
- int i, n;
/* special case: only send one comm event using passed in pid */
if (!full) {
@@ -802,16 +803,19 @@ static int __event__synthesize_thread(union perf_event *comm_event,
snprintf(filename, sizeof(filename), "%s/proc/%d/task",
machine->root_dir, pid);
- n = scandir(filename, &dirent, filter_task, NULL);
- if (n < 0)
- return n;
+ io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (iod.dirfd < 0)
+ return -1;
- for (i = 0; i < n; i++) {
+ while ((dent = io_dir__readdir(&iod)) != NULL) {
char *end;
pid_t _pid;
bool kernel_thread = false;
- _pid = strtol(dirent[i]->d_name, &end, 10);
+ if (!isdigit(dent->d_name[0]))
+ continue;
+
+ _pid = strtol(dent->d_name, &end, 10);
if (*end)
continue;
@@ -845,9 +849,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
}
}
- for (i = 0; i < n; i++)
- zfree(&dirent[i]);
- free(dirent);
+ close(iod.dirfd);
return rc;
}
@@ -1508,9 +1510,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
+ if (sample->user_regs && sample->user_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1536,9 +1538,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
result += sizeof(u64);
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
+ if (sample->intr_regs && sample->intr_regs->abi) {
result += sizeof(u64);
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -1707,10 +1709,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_USER) {
- if (sample->user_regs.abi) {
- *array++ = sample->user_regs.abi;
- sz = hweight64(sample->user_regs.mask) * sizeof(u64);
- memcpy(array, sample->user_regs.regs, sz);
+ if (sample->user_regs && sample->user_regs->abi) {
+ *array++ = sample->user_regs->abi;
+ sz = hweight64(sample->user_regs->mask) * sizeof(u64);
+ memcpy(array, sample->user_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
@@ -1743,10 +1745,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_REGS_INTR) {
- if (sample->intr_regs.abi) {
- *array++ = sample->intr_regs.abi;
- sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
- memcpy(array, sample->intr_regs.regs, sz);
+ if (sample->intr_regs && sample->intr_regs->abi) {
+ *array++ = sample->intr_regs->abi;
+ sz = hweight64(sample->intr_regs->mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs->regs, sz);
array = (void *)array + sz;
} else {
*array++ = 0;
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 928aca4cd6e9..67a8ec10e9e4 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -7,119 +7,127 @@
#include "syscalltbl.h"
#include <stdlib.h>
+#include <asm/bitsperlong.h>
#include <linux/compiler.h>
+#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <string.h>
#include "string2.h"
-#include <syscall_table.h>
-const int syscalltbl_native_max_id = SYSCALLTBL_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl;
+#include "trace/beauty/generated/syscalltbl.c"
-struct syscall {
- int id;
- const char *name;
-};
-
-static int syscallcmpname(const void *vkey, const void *ventry)
-{
- const char *key = vkey;
- const struct syscall *entry = ventry;
-
- return strcmp(key, entry->name);
-}
-
-static int syscallcmp(const void *va, const void *vb)
+static const struct syscalltbl *find_table(int e_machine)
{
- const struct syscall *a = va, *b = vb;
+ static const struct syscalltbl *last_table;
+ static int last_table_machine = EM_NONE;
- return strcmp(a->name, b->name);
-}
+ /* Tables only exist for EM_SPARC. */
+ if (e_machine == EM_SPARCV9)
+ e_machine = EM_SPARC;
-static int syscalltbl__init_native(struct syscalltbl *tbl)
-{
- int nr_entries = 0, i, j;
- struct syscall *entries;
+ if (last_table_machine == e_machine && last_table != NULL)
+ return last_table;
- for (i = 0; i <= syscalltbl_native_max_id; ++i)
- if (syscalltbl_native[i])
- ++nr_entries;
+ for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) {
+ const struct syscalltbl *entry = &syscalltbls[i];
- entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
- if (tbl->syscalls.entries == NULL)
- return -1;
+ if (entry->e_machine != e_machine && entry->e_machine != EM_NONE)
+ continue;
- for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
- if (syscalltbl_native[i]) {
- entries[j].name = syscalltbl_native[i];
- entries[j].id = i;
- ++j;
- }
+ last_table = entry;
+ last_table_machine = e_machine;
+ return entry;
}
-
- qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
- tbl->syscalls.nr_entries = nr_entries;
- tbl->syscalls.max_id = syscalltbl_native_max_id;
- return 0;
+ return NULL;
}
-struct syscalltbl *syscalltbl__new(void)
+const char *syscalltbl__name(int e_machine, int id)
{
- struct syscalltbl *tbl = malloc(sizeof(*tbl));
- if (tbl) {
- if (syscalltbl__init_native(tbl)) {
- free(tbl);
- return NULL;
- }
+ const struct syscalltbl *table = find_table(e_machine);
+
+ if (e_machine == EM_MIPS && id > 1000) {
+ /*
+ * MIPS may encode the N32/64/O32 type in the high part of
+ * syscall number. Mask this off if present. See the values of
+ * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux.
+ */
+ id = id % 1000;
}
- return tbl;
+ if (table && id >= 0 && id < table->num_to_name_len)
+ return table->num_to_name[id];
+ return NULL;
}
-void syscalltbl__delete(struct syscalltbl *tbl)
+struct syscall_cmp_key {
+ const char *name;
+ const char *const *tbl;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
{
- zfree(&tbl->syscalls.entries);
- free(tbl);
+ const struct syscall_cmp_key *key = vkey;
+ const uint16_t *entry = ventry;
+
+ return strcmp(key->name, key->tbl[*entry]);
}
-const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+int syscalltbl__id(int e_machine, const char *name)
{
- return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+ const struct syscalltbl *table = find_table(e_machine);
+ struct syscall_cmp_key key;
+ const uint16_t *id;
+
+ if (!table)
+ return -1;
+
+ key.name = name;
+ key.tbl = table->num_to_name;
+ id = bsearch(&key, table->sorted_names, table->sorted_names_len,
+ sizeof(table->sorted_names[0]), syscallcmpname);
+
+ return id ? *id : -1;
}
-int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+int syscalltbl__num_idx(int e_machine)
{
- struct syscall *sc = bsearch(name, tbl->syscalls.entries,
- tbl->syscalls.nr_entries, sizeof(*sc),
- syscallcmpname);
+ const struct syscalltbl *table = find_table(e_machine);
+
+ if (!table)
+ return 0;
- return sc ? sc->id : -1;
+ return table->sorted_names_len;
}
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx)
+int syscalltbl__id_at_idx(int e_machine, int idx)
{
- struct syscall *syscalls = tbl->syscalls.entries;
+ const struct syscalltbl *table = find_table(e_machine);
- return idx < tbl->syscalls.nr_entries ? syscalls[idx].id : -1;
+ if (!table)
+ return -1;
+
+ assert(idx >= 0 && idx < table->sorted_names_len);
+ return table->sorted_names[idx];
}
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx)
{
- int i;
- struct syscall *syscalls = tbl->syscalls.entries;
+ const struct syscalltbl *table = find_table(e_machine);
+
+ for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) {
+ const char *name = table->num_to_name[table->sorted_names[i]];
- for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
- if (strglobmatch(syscalls[i].name, syscall_glob)) {
+ if (strglobmatch(name, syscall_glob)) {
*idx = i;
- return syscalls[i].id;
+ return table->sorted_names[i];
}
}
return -1;
}
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx)
{
*idx = -1;
- return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+ return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx);
}
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 362411a6d849..2bb628eff367 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -2,22 +2,12 @@
#ifndef __PERF_SYSCALLTBL_H
#define __PERF_SYSCALLTBL_H
-struct syscalltbl {
- struct {
- int max_id;
- int nr_entries;
- void *entries;
- } syscalls;
-};
+const char *syscalltbl__name(int e_machine, int id);
+int syscalltbl__id(int e_machine, const char *name);
+int syscalltbl__num_idx(int e_machine);
+int syscalltbl__id_at_idx(int e_machine, int idx);
-struct syscalltbl *syscalltbl__new(void);
-void syscalltbl__delete(struct syscalltbl *tbl);
-
-const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
-int syscalltbl__id(struct syscalltbl *tbl, const char *name);
-int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx);
-
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx);
#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0ffdd52d86d7..89585f53c1d5 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
#include <errno.h>
+#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -16,6 +18,7 @@
#include "symbol.h"
#include "unwind.h"
#include "callchain.h"
+#include "dwarf-regs.h"
#include <api/fs/fs.h>
@@ -51,6 +54,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread__set_ppid(thread, -1);
thread__set_cpu(thread, -1);
thread__set_guest_cpu(thread, -1);
+ thread__set_e_machine(thread, EM_NONE);
thread__set_lbr_stitch_enable(thread, false);
INIT_LIST_HEAD(thread__namespaces_list(thread));
INIT_LIST_HEAD(thread__comm_list(thread));
@@ -423,6 +427,82 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
}
}
+static uint16_t read_proc_e_machine_for_pid(pid_t pid)
+{
+ char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */];
+ int fd;
+ uint16_t e_machine = EM_NONE;
+
+ snprintf(path, sizeof(path), "/proc/%d/exe", pid);
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset");
+ _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset");
+ if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine))
+ e_machine = EM_NONE;
+ close(fd);
+ }
+ return e_machine;
+}
+
+static int thread__e_machine_callback(struct map *map, void *machine)
+{
+ struct dso *dso = map__dso(map);
+
+ _Static_assert(0 == EM_NONE, "Unexpected EM_NONE");
+ if (!dso)
+ return EM_NONE;
+
+ return dso__e_machine(dso, machine);
+}
+
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine)
+{
+ pid_t tid, pid;
+ uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine;
+
+ if (e_machine != EM_NONE)
+ return e_machine;
+
+ tid = thread__tid(thread);
+ pid = thread__pid(thread);
+ if (pid != tid) {
+ struct thread *parent = machine__findnew_thread(machine, pid, pid);
+
+ if (parent) {
+ e_machine = thread__e_machine(parent, machine);
+ thread__set_e_machine(thread, e_machine);
+ return e_machine;
+ }
+ /* Something went wrong, fallback. */
+ }
+ /* Reading on the PID thread. First try to find from the maps. */
+ e_machine = maps__for_each_map(thread__maps(thread),
+ thread__e_machine_callback,
+ machine);
+ if (e_machine == EM_NONE) {
+ /* Maps failed, perhaps we're live with map events disabled. */
+ bool is_live = machine->machines == NULL;
+
+ if (!is_live) {
+ /* Check if the session has a data file. */
+ struct perf_session *session = container_of(machine->machines,
+ struct perf_session,
+ machines);
+
+ is_live = !!session->data;
+ }
+ /* Read from /proc/pid/exe if live. */
+ if (is_live)
+ e_machine = read_proc_e_machine_for_pid(pid);
+ }
+ if (e_machine != EM_NONE)
+ thread__set_e_machine(thread, e_machine);
+ else
+ e_machine = EM_HOST;
+ return e_machine;
+}
+
struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
{
if (thread__pid(thread) == thread__tid(thread))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 6cbf6eb2812e..cd574a896418 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -60,7 +60,11 @@ DECLARE_RC_STRUCT(thread) {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
-
+ /**
+ * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not
+ * computed.
+ */
+ uint16_t e_machine;
/* LBR call stack stitch */
bool lbr_stitch_enable;
struct lbr_stitch *lbr_stitch;
@@ -302,6 +306,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep
RC_CHK_ACCESS(thread)->filter_entry_depth = depth;
}
+uint16_t thread__e_machine(struct thread *thread, struct machine *machine);
+
+static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine)
+{
+ RC_CHK_ACCESS(thread)->e_machine = e_machine;
+}
+
+
static inline bool thread__lbr_stitch_enable(const struct thread *thread)
{
return RC_CHK_ACCESS(thread)->lbr_stitch_enable;
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index 4fb097578479..97b327d1ce4a 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -62,7 +62,8 @@ int tool_pmu__num_skip_events(void)
const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
{
- if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX)
+ if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
+ !tool_pmu__skip_event(tool_pmu__event_names[ev]))
return tool_pmu__event_names[ev];
return NULL;
@@ -354,6 +355,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
if (online) {
*result = perf_cpu_map__nr(online);
+ perf_cpu_map__put(online);
return true;
}
return false;
@@ -489,17 +491,24 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
return 0;
}
-struct perf_pmu *perf_pmus__tool_pmu(void)
+struct perf_pmu *tool_pmu__new(void)
{
- static struct perf_pmu tool = {
- .name = "tool",
- .type = PERF_PMU_TYPE_TOOL,
- .aliases = LIST_HEAD_INIT(tool.aliases),
- .caps = LIST_HEAD_INIT(tool.caps),
- .format = LIST_HEAD_INIT(tool.format),
- };
- if (!tool.events_table)
- tool.events_table = find_core_events_table("common", "common");
-
- return &tool;
+ struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
+
+ if (!tool)
+ goto out;
+ tool->name = strdup("tool");
+ if (!tool->name) {
+ zfree(&tool);
+ goto out;
+ }
+
+ tool->type = PERF_PMU_TYPE_TOOL;
+ INIT_LIST_HEAD(&tool->aliases);
+ INIT_LIST_HEAD(&tool->caps);
+ INIT_LIST_HEAD(&tool->format);
+ tool->events_table = find_core_events_table("common", "common");
+
+out:
+ return tool;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index a60184859080..c6ad1dd90a56 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -51,6 +51,6 @@ int evsel__tool_pmu_open(struct evsel *evsel,
int start_cpu_map_idx, int end_cpu_map_idx);
int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
-struct perf_pmu *perf_pmus__tool_pmu(void);
+struct perf_pmu *tool_pmu__new(void);
#endif /* __TOOL_PMU_H */
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 4e81e02a4f18..72abb28b7b5a 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -309,53 +309,107 @@ static const struct {
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
- {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"},
{0, NULL}
};
-static const char *sample_flags_to_name(u32 flags)
+static const struct {
+ u32 flags;
+ const char *name;
+} branch_events[] = {
+ {PERF_IP_FLAG_BRANCH_MISS, "miss"},
+ {PERF_IP_FLAG_NOT_TAKEN, "not_taken"},
+ {0, NULL}
+};
+
+static int sample_flags_to_name(u32 flags, char *str, size_t size)
{
int i;
+ const char *prefix;
+ int pos = 0, ret, ev_idx = 0;
+ u32 xf = flags & PERF_ADDITIONAL_STATE_MASK;
+ u32 types, events;
+ char xs[16] = { 0 };
+
+ /* Clear additional state bits */
+ flags &= ~PERF_ADDITIONAL_STATE_MASK;
+
+ if (flags & PERF_IP_FLAG_TRACE_BEGIN)
+ prefix = "tr strt ";
+ else if (flags & PERF_IP_FLAG_TRACE_END)
+ prefix = "tr end ";
+ else
+ prefix = "";
+
+ ret = snprintf(str + pos, size - pos, "%s", prefix);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+
+ flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END);
+
+ types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; sample_flags[i].name; i++) {
+ if (sample_flags[i].flags != types)
+ continue;
+
+ ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ break;
+ }
- for (i = 0; sample_flags[i].name ; i++) {
- if (sample_flags[i].flags == flags)
- return sample_flags[i].name;
+ events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK;
+ for (i = 0; branch_events[i].name; i++) {
+ if (!(branch_events[i].flags & events))
+ continue;
+
+ ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s",
+ branch_events[i].name);
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ ev_idx++;
}
- return NULL;
+ /* Add an end character '/' for events */
+ if (ev_idx) {
+ ret = snprintf(str + pos, size - pos, "/");
+ if (ret < 0)
+ return ret;
+ pos += ret;
+ }
+
+ if (!xf)
+ return pos;
+
+ snprintf(xs, sizeof(xs), "(%s%s%s)",
+ flags & PERF_IP_FLAG_IN_TX ? "x" : "",
+ flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
+ flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
+
+ /* Right align the string if its length is less than the limit */
+ if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE)
+ ret = snprintf(str + pos, size - pos, "%*s",
+ (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs);
+ else
+ ret = snprintf(str + pos, size - pos, " %s", xs);
+ if (ret < 0)
+ return ret;
+
+ return pos + ret;
}
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
{
- u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
- PERF_IP_FLAG_INTR_TOGGLE;
const char *chars = PERF_IP_FLAG_CHARS;
const size_t n = strlen(PERF_IP_FLAG_CHARS);
- const char *name = NULL;
size_t i, pos = 0;
- char xs[16] = {0};
-
- if (flags & xf)
- snprintf(xs, sizeof(xs), "(%s%s%s)",
- flags & PERF_IP_FLAG_IN_TX ? "x" : "",
- flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
- flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
-
- name = sample_flags_to_name(flags & ~xf);
- if (name)
- return snprintf(str, sz, "%-15s%6s", name, xs);
-
- if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
- name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
- if (name)
- return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
- }
+ int ret;
- if (flags & PERF_IP_FLAG_TRACE_END) {
- name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
- if (name)
- return snprintf(str, sz, "tr end %-7s%6s", name, xs);
- }
+ ret = sample_flags_to_name(flags, str, sz);
+ if (ret > 0)
+ return ret;
for (i = 0; i < n; i++, flags >>= 1) {
if ((flags & 1) && pos < sz)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index ac9fde2f980c..71e680bc3d4b 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -145,6 +145,8 @@ int common_flags(struct scripting_context *context);
int common_lock_depth(struct scripting_context *context);
#define SAMPLE_FLAGS_BUF_SIZE 64
+#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21
+
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
index 32c39cfe209b..4c6a86e1cb54 100644
--- a/tools/perf/util/units.c
+++ b/tools/perf/util/units.c
@@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit)
int unit_number__scnprintf(char *buf, size_t size, u64 n)
{
- char unit[4] = "BKMG";
+ char unit[] = "BKMG";
int i = 0;
while (((n / 1024) > 1) && (i < 3)) {
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index bde216e630d2..793d11832694 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -190,7 +190,10 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
int offset;
int ret;
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ if (!ui->sample->user_regs)
+ return false;
+
+ ret = perf_reg_value(&start, ui->sample->user_regs,
perf_arch_reg_sp(arch));
if (ret)
return false;
@@ -273,7 +276,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
Dwarf_Word ip;
int err = -EINVAL, i;
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
@@ -286,7 +289,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
if (!ui->dwfl)
goto out;
- err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch));
+ err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch));
if (err)
goto out;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 16c2b03831f3..9fb2c1343c7f 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
int ret, fd;
if (dso__data(dso)->eh_frame_hdr_offset == 0) {
- fd = dso__data_get_fd(dso, ui->machine);
- if (fd < 0)
+ if (!dso__data_get_fd(dso, ui->machine, &fd))
return -EINVAL;
/* Check the .eh_frame section for unwinding info */
@@ -372,8 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
* has to be pointed by symsrc_filename
*/
if (ofs == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd >= 0) {
+ if (dso__data_get_fd(dso, machine, &fd) {
ofs = elf_section_offset(fd, ".debug_frame");
dso__data_put_fd(dso);
}
@@ -485,14 +483,16 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
/* Check the .debug_frame section for unwinding info */
if (ret < 0 &&
!read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) {
- int fd = dso__data_get_fd(dso, ui->machine);
- int is_exec = elf_is_exec(fd, dso__name(dso));
+ int fd;
u64 start = map__start(map);
- unw_word_t base = is_exec ? 0 : start;
+ unw_word_t base = start;
const char *symfile;
- if (fd >= 0)
+ if (dso__data_get_fd(dso, ui->machine, &fd)) {
+ if (elf_is_exec(fd, dso__name(dso)))
+ base = 0;
dso__data_put_fd(dso);
+ }
symfile = dso__symsrc_filename(dso) ?: dso__name(dso);
@@ -579,12 +579,12 @@ static int access_mem(unw_addr_space_t __maybe_unused as,
int ret;
/* Don't support write, probably not needed. */
- if (__write || !stack || !ui->sample->user_regs.regs) {
+ if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
- ret = perf_reg_value(&start, &ui->sample->user_regs,
+ ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample),
perf_arch_reg_sp(arch));
if (ret)
return ret;
@@ -628,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
return 0;
}
- if (!ui->sample->user_regs.regs) {
+ if (!ui->sample->user_regs || !ui->sample->user_regs->regs) {
*valp = 0;
return 0;
}
@@ -637,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
if (id < 0)
return -EINVAL;
- ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id);
if (ret) {
if (!ui->best_effort)
pr_err("unwind: can't read reg %d\n", regnum);
@@ -741,7 +741,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
unw_cursor_t c;
int ret, i = 0;
- ret = perf_reg_value(&val, &ui->sample->user_regs,
+ ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample),
perf_arch_reg_ip(arch));
if (ret)
return ret;
@@ -808,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.best_effort = best_effort
};
- if (!data->user_regs.regs)
+ if (!data->user_regs || !data->user_regs->regs)
return -EINVAL;
if (max_stack <= 0)