diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-19 14:25:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-19 14:25:23 -0800 |
commit | 9d64bf433c53cab2f48a3fff7a1f2a696bc5229a (patch) | |
tree | 0535254c177c27ec34adbc153d494cea9a9625a0 | |
parent | 57f22c8dab6b266ae36b89b073a4a33dea71e762 (diff) | |
parent | d988c9f511af71a3445b6a4f3a2c67208ff8e480 (diff) | |
download | lwn-9d64bf433c53cab2f48a3fff7a1f2a696bc5229a.tar.gz lwn-9d64bf433c53cab2f48a3fff7a1f2a696bc5229a.zip |
Merge tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Arnaldo Carvalho de Melo:
"Add Namhyung Kim as tools/perf/ co-maintainer, we're taking turns
processing patches, switching roles from perf-tools to perf-tools-next
at each Linux release.
Data profiling:
- Associate samples that identify loads and stores with data
structures. This uses events available on Intel, AMD and others and
DWARF info:
# To get memory access samples in kernel for 1 second (on Intel)
$ perf mem record -a -K --ldlat=4 -- sleep 1
# Similar for the AMD (but it requires 6.3+ kernel for BPF filters)
$ perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000' -- sleep 1
Then, amongst several modes of post processing, one can do things like:
$ perf report -s type,typeoff --hierarchy --group --stdio
...
#
# Samples: 10K of events 'cpu/mem-loads,ldlat=4/P, cpu/mem-stores/P, dummy:u'
# Event count (approx.): 602758064
#
# Overhead Data Type / Data Type Offset
# ........................... ............................
#
26.09% 3.28% 0.00% long unsigned int
26.09% 3.28% 0.00% long unsigned int +0 (no field)
18.48% 0.73% 0.00% struct page
10.83% 0.02% 0.00% struct page +8 (lru.next)
3.90% 0.28% 0.00% struct page +0 (flags)
3.45% 0.06% 0.00% struct page +24 (mapping)
0.25% 0.28% 0.00% struct page +48 (_mapcount.counter)
0.02% 0.06% 0.00% struct page +32 (index)
0.02% 0.00% 0.00% struct page +52 (_refcount.counter)
0.02% 0.01% 0.00% struct page +56 (memcg_data)
0.00% 0.01% 0.00% struct page +16 (lru.prev)
15.37% 17.54% 0.00% (stack operation)
15.37% 17.54% 0.00% (stack operation) +0 (no field)
11.71% 50.27% 0.00% (unknown)
11.71% 50.27% 0.00% (unknown) +0 (no field)
$ perf annotate --data-type
...
Annotate type: 'struct cfs_rq' in [kernel.kallsyms] (13 samples):
============================================================================
samples offset size field
13 0 640 struct cfs_rq {
2 0 16 struct load_weight load {
2 0 8 unsigned long weight;
0 8 4 u32 inv_weight;
};
0 16 8 unsigned long runnable_weight;
0 24 4 unsigned int nr_running;
1 28 4 unsigned int h_nr_running;
...
$ perf annotate --data-type=page --group
Annotate type: 'struct page' in [kernel.kallsyms] (480 samples):
event[0] = cpu/mem-loads,ldlat=4/P
event[1] = cpu/mem-stores/P
event[2] = dummy:u
===================================================================================
samples offset size field
447 33 0 0 64 struct page {
108 8 0 0 8 long unsigned int flags;
319 13 0 8 40 union {
319 13 0 8 40 struct {
236 2 0 8 16 union {
236 2 0 8 16 struct list_head lru {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct {
236 1 0 8 8 void* __filler;
0 1 0 16 4 unsigned int mlock_count;
};
236 2 0 8 16 struct list_head buddy_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct list_head pcp_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
};
82 4 0 24 8 struct address_space* mapping;
1 7 0 32 8 union {
1 7 0 32 8 long unsigned int index;
1 7 0 32 8 long unsigned int share;
};
0 0 0 40 8 long unsigned int private;
};
This uses the existing annotate code, calling objdump to do the
disassembly, with improvements to avoid having this take too long,
but longer term a switch to a disassembler library, possibly
reusing code in the kernel will be pursued.
This is the initial implementation, please use it and report
impressions and bugs. Make sure the kernel-debuginfo packages match
the running kernel. The 'perf report' phase for non short perf.data
files may take a while.
There is a great article about it on LWN:
https://lwn.net/Articles/955709/ - "Data-type profiling for perf"
One last test I did while writing this text, on a AMD Ryzen 5950X,
using a distro kernel, while doing a simple 'find /' on an
otherwise idle system resulted in:
# uname -r
6.6.9-100.fc38.x86_64
# perf -vv | grep BPF_
bpf: [ on ] # HAVE_LIBBPF_SUPPORT
bpf_skeletons: [ on ] # HAVE_BPF_SKEL
# rpm -qa | grep kernel-debuginfo
kernel-debuginfo-common-x86_64-6.6.9-100.fc38.x86_64
kernel-debuginfo-6.6.9-100.fc38.x86_64
#
# perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000'
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.199 MB perf.data (2913 samples) ]
#
# ls -la perf.data
-rw-------. 1 root root 2346486 Jan 9 18:36 perf.data
# perf evlist
ibs_op//
dummy:u
# perf evlist -v
ibs_op//: type: 11, size: 136, config: 0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|ADDR|CPU|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
#
# perf report -s type,typeoff --hierarchy --group --stdio
# Total Lost Samples: 0
#
# Samples: 2K of events 'ibs_op//, dummy:u'
# Event count (approx.): 1904553038
#
# Overhead Data Type / Data Type Offset
# ................... ............................
#
73.70% 0.00% (unknown)
73.70% 0.00% (unknown) +0 (no field)
3.01% 0.00% long unsigned int
3.00% 0.00% long unsigned int +0 (no field)
0.01% 0.00% long unsigned int +2 (no field)
2.73% 0.00% struct task_struct
1.71% 0.00% struct task_struct +52 (on_cpu)
0.38% 0.00% struct task_struct +2104 (rcu_read_unlock_special.b.blocked)
0.23% 0.00% struct task_struct +2100 (rcu_read_lock_nesting)
0.14% 0.00% struct task_struct +2384 ()
0.06% 0.00% struct task_struct +3096 (signal)
0.05% 0.00% struct task_struct +3616 (cgroups)
0.05% 0.00% struct task_struct +2344 (active_mm)
0.02% 0.00% struct task_struct +46 (flags)
0.02% 0.00% struct task_struct +2096 (migration_disabled)
0.01% 0.00% struct task_struct +24 (__state)
0.01% 0.00% struct task_struct +3956 (mm_cid_active)
0.01% 0.00% struct task_struct +1048 (cpus_ptr)
0.01% 0.00% struct task_struct +184 (se.group_node.next)
0.01% 0.00% struct task_struct +20 (thread_info.cpu)
0.00% 0.00% struct task_struct +104 (on_rq)
0.00% 0.00% struct task_struct +2456 (pid)
1.36% 0.00% struct module
0.59% 0.00% struct module +952 (kallsyms)
0.42% 0.00% struct module +0 (state)
0.23% 0.00% struct module +8 (list.next)
0.12% 0.00% struct module +216 (syms)
0.95% 0.00% struct inode
0.41% 0.00% struct inode +40 (i_sb)
0.22% 0.00% struct inode +0 (i_mode)
0.06% 0.00% struct inode +76 (i_rdev)
0.06% 0.00% struct inode +56 (i_security)
<SNIP>
perf top/report:
- Don't ignore job control, allowing control+Z + bg to work.
- Add s390 raw data interpretation for PAI (Processor Activity
Instrumentation) counters.
perf archive:
- Add new option '--all' to pack perf.data with DSOs.
- Add new option '--unpack' to expand tarballs.
Initialization speedups:
- Lazily initialize zstd streams to save memory when not using it.
- Lazily allocate/size mmap event copy.
- Lazy load kernel symbols in 'perf record'.
- Be lazier in allocating lost samples buffer in 'perf record'.
- Don't synthesize BPF events when disabled via the command line
(perf record --no-bpf-event).
Assorted improvements:
- Show note on AMD systems that the :p, :pp, :ppp and :P are all the
same, as IBS (Instruction Based Sampling) is used and it is
inherentely precise, not having levels of precision like in Intel
systems.
- When 'cycles' isn't available, fall back to the "task-clock" event
when not system wide, not to 'cpu-clock'.
- Add --debug-file option to redirect debug output, e.g.:
$ perf --debug-file /tmp/perf.log record -v true
- Shrink 'struct map' to under one cacheline by avoiding function
pointers for selecting if addresses are identity or DSO relative,
and using just a byte for some boolean struct members.
- Resolve the arch specific strerrno just once to use in
perf_env__arch_strerrno().
- Reduce memory for recording PERF_RECORD_LOST_SAMPLES event.
Assorted fixes:
- Fix the default 'perf top' usage on Intel hybrid systems, now it
starts with a browser showing the number of samples for Efficiency
(cpu_atom/cycles/P) and Performance (cpu_core/cycles/P). This
behaviour is similar on ARM64, with its respective set of
big.LITTLE processors.
- Fix segfault on build_mem_topology() error path.
- Fix 'perf mem' error on hybrid related to availability of mem event
in a PMU.
- Fix missing reference count gets (map, maps) in the db-export code.
- Avoid recursively taking env->bpf_progs.lock in the 'perf_env'
code.
- Use the newly introduced maps__for_each_map() to add missing
locking around iteration of 'struct map' entries.
- Parse NOTE segments until the build id is found, don't stop on the
first one, ELF files may have several such NOTE segments.
- Remove 'egrep' usage, its deprecated, use 'grep -E' instead.
- Warn first about missing libelf, not libbpf, that depends on
libelf.
- Use alternative to 'find ... -printf' as this isn't supported in
busybox.
- Address python 3.6 DeprecationWarning for string scapes.
- Fix memory leak in uniq() in libsubcmd.
- Fix man page formatting for 'perf lock'
- Fix some spelling mistakes.
perf tests:
- Fail shell tests that needs some symbol in perf itself if it is
stripped. These tests check if a symbol is resolved, if some hot
function is indeed detected by profiling, etc.
- The 'perf test sigtrap' test is currently failing on PREEMPT_RT,
skip it if sleeping spinlocks are detected (using BTF) and point to
the mailing list discussion about it. This test is also being
skipped on several architectures (powerpc, s390x, arm and aarch64)
due to other pending issues with intruction breakpoints.
- Adjust test case perf record offcpu profiling tests for s390.
- Fix 'Setup struct perf_event_attr' fails on s390 on z/VM guest,
addressing issues caused by the fallback from cycles to task-clock
done in this release.
- Fix mask for VG register in the user-regs test.
- Use shellcheck on 'perf test' shell scripts automatically to make
sure changes don't introduce things it flags as problematic.
- Add option to change objdump binary and allow it to be set via
'perf config'.
- Add basic 'perf script', 'perf list --json" and 'perf diff' tests.
- Basic branch counter support.
- Make DSO tests a suite rather than individual.
- Remove atomics from test_loop to avoid test failures.
- Fix call chain match on powerpc for the record+probe_libc_inet_pton
test.
- Improve Intel hybrid tests.
Vendor event files (JSON):
powerpc:
- Update datasource event name to fix duplicate events on IBM's
Power10.
- Add PVN for HX-C2000 CPU with Power8 Architecture.
Intel:
- Alderlake/rocketlake metric fixes.
- Update emeraldrapids events to v1.02.
- Update icelakex events to v1.23.
- Update sapphirerapids events to v1.17.
- Add skx, clx, icx and spr upi bandwidth metric.
AMD:
- Add Zen 4 memory controller events.
RISC-V:
- Add StarFive Dubhe-80 and Dubhe-90 JSON files.
https://www.starfivetech.com/en/site/cpu-u
- Add T-HEAD C9xx JSON file.
https://github.com/riscv-software-src/opensbi/blob/master/docs/platform/thead-c9xx.md
ARM64:
- Remove UTF-8 characters from cmn.json, that were causing build
failure in some distros.
- Add core PMU events and metrics for Ampere One X.
- Rename Ampere One's BPU_FLUSH_MEM_FAULT to GPC_FLUSH_MEM_FAULT
libperf:
- Rename several perf_cpu_map constructor names to clarify what they
really do.
- Ditto for some other methods, coping with some issues in their
semantics, like perf_cpu_map__empty() ->
perf_cpu_map__has_any_cpu_or_is_empty().
- Document perf_cpu_map__nr()'s behavior
perf stat:
- Exit if parse groups fails.
- Combine the -A/--no-aggr and --no-merge options.
- Fix help message for --metric-no-threshold option.
Hardware tracing:
ARM64 CoreSight:
- Bump minimum OpenCSD version to ensure a bugfix is present.
- Add 'T' itrace option for timestamp trace
- Set start vm addr of exectable file to 0 and don't ignore first
sample on the arm-cs-trace-disasm.py 'perf script'"
* tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (179 commits)
MAINTAINERS: Add Namhyung as tools/perf/ co-maintainer
perf test: test case 'Setup struct perf_event_attr' fails on s390 on z/vm
perf db-export: Fix missing reference count get in call_path_from_sample()
perf tests: Add perf script test
libsubcmd: Fix memory leak in uniq()
perf TUI: Don't ignore job control
perf vendor events intel: Update sapphirerapids events to v1.17
perf vendor events intel: Update icelakex events to v1.23
perf vendor events intel: Update emeraldrapids events to v1.02
perf vendor events intel: Alderlake/rocketlake metric fixes
perf x86 test: Add hybrid test for conflicting legacy/sysfs event
perf x86 test: Update hybrid expectations
perf vendor events amd: Add Zen 4 memory controller events
perf stat: Fix hard coded LL miss units
perf record: Reduce memory for recording PERF_RECORD_LOST_SAMPLES event
perf env: Avoid recursively taking env->bpf_progs.lock
perf annotate: Add --insn-stat option for debugging
perf annotate: Add --type-stat option for debugging
perf annotate: Support event group display
perf annotate: Add --data-type option
...
249 files changed, 7843 insertions, 2141 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2b90c5e28100..04187a22806a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17140,10 +17140,10 @@ PERFORMANCE EVENTS SUBSYSTEM M: Peter Zijlstra <peterz@infradead.org> M: Ingo Molnar <mingo@redhat.com> M: Arnaldo Carvalho de Melo <acme@kernel.org> +M: Namhyung Kim <namhyung@kernel.org> R: Mark Rutland <mark.rutland@arm.com> R: Alexander Shishkin <alexander.shishkin@linux.intel.com> R: Jiri Olsa <jolsa@kernel.org> -R: Namhyung Kim <namhyung@kernel.org> R: Ian Rogers <irogers@google.com> R: Adrian Hunter <adrian.hunter@intel.com> L: linux-perf-users@vger.kernel.org diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 934e2777a2db..64df118376df 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -32,6 +32,7 @@ FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ dwarf_getlocations \ + dwarf_getcfi \ eventfd \ fortify-source \ get_current_dir_name \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index dad79ede4e0a..37722e509eb9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -7,6 +7,7 @@ FILES= \ test-bionic.bin \ test-dwarf.bin \ test-dwarf_getlocations.bin \ + test-dwarf_getcfi.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -154,6 +155,9 @@ $(OUTPUT)test-dwarf.bin: $(OUTPUT)test-dwarf_getlocations.bin: $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-dwarf_getcfi.bin: + $(BUILD) $(DWARFLIBS) + $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c new file mode 100644 index 000000000000..50e7d7cb7bdf --- /dev/null +++ b/tools/build/feature/test-dwarf_getcfi.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <elfutils/libdw.h> + +int main(void) +{ + Dwarf *dwarf = NULL; + return dwarf_getcfi(dwarf) == NULL; +} diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index eb6303ff446e..4cfcef9da3e4 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) +#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.1.1 is required" +#error "OpenCSD >= 1.2.1 is required" #endif int main(void) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 39c6a250dd1b..3a64499b0f5d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi @@ -1427,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 5cb0eeec2c8a..337fde770e45 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -16,6 +16,7 @@ #include <sys/mount.h> #include "fs.h" +#include "../io.h" #include "debug-internal.h" #define _STR(x) #x @@ -344,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value) return filename__read_ull_base(filename, value, 0); } -#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ - int filename__read_str(const char *filename, char **buf, size_t *sizep) { - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; + struct io io; + char bf[128]; + int err; - fd = open(filename, O_RDONLY); - if (fd < 0) + io.fd = open(filename, O_RDONLY); + if (io.fd < 0) return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warn("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; + io__init(&io, io.fd, bf, sizeof(bf)); + *buf = NULL; + err = io__getdelim(&io, buf, sizep, /*delim=*/-1); + if (err < 0) { + free(*buf); + *buf = NULL; } else - free(bf); - - close(fd); + err = 0; + close(io.fd); return err; } @@ -475,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) int sysfs__read_bool(const char *entry, bool *value) { - char *buf; - size_t size; - int ret; + struct io io; + char bf[16]; + int ret = 0; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; - ret = sysfs__read_str(entry, &buf, &size); - if (ret < 0) - return ret; + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + io.fd = open(path, O_RDONLY); + if (io.fd < 0) + return -errno; - switch (buf[0]) { + io__init(&io, io.fd, bf, sizeof(bf)); + switch (io__get_char(&io)) { case '1': case 'y': case 'Y': @@ -497,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value) default: ret = -1; } - - free(buf); + close(io.fd); return ret; } diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index a77b74c5fb65..84adf8102018 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -12,6 +12,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <linux/types.h> struct io { /* File descriptor being read/ */ @@ -140,8 +141,8 @@ static inline int io__get_dec(struct io *io, __u64 *dec) } } -/* Read up to and including the first newline following the pattern of getline. */ -static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +/* Read up to and including the first delim. */ +static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim) { char buf[128]; int buf_pos = 0; @@ -151,7 +152,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l /* TODO: reuse previously allocated memory. */ free(*line_out); - while (ch != '\n') { + while (ch != delim) { ch = io__get_char(io); if (ch < 0) @@ -184,4 +185,9 @@ err_out: return -ENOMEM; } +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +{ + return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n'); +} + #endif /* __API_IO__ */ diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c index 8e1a926a9cfe..bc142f0664b5 100644 --- a/tools/lib/perf/Documentation/examples/sampling.c +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { fprintf(stderr, "failed to create cpus\n"); return -1; diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d6ca24f6ef78..2378980fab8a 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs: [source,c] -- - 42 cpus = perf_cpu_map__new(NULL); + 42 cpus = perf_cpu_map__new_online_cpus(); 43 if (!cpus) { 44 fprintf(stderr, "failed to create cpus\n"); 45 return -1; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index a8f1a237931b..fcfb9499ef9c 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -37,7 +37,7 @@ SYNOPSIS struct perf_cpu_map; - struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); @@ -46,7 +46,7 @@ SYNOPSIS void perf_cpu_map__put(struct perf_cpu_map *map); int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); int perf_cpu_map__nr(const struct perf_cpu_map *cpus); - bool perf_cpu_map__empty(const struct perf_cpu_map *map); + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 2a5a29217374..4adcd7920d03 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -9,6 +9,7 @@ #include <unistd.h> #include <ctype.h> #include <limits.h> +#include "internal.h" void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { @@ -27,7 +28,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) return result; } -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) { struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); @@ -66,15 +67,21 @@ void perf_cpu_map__put(struct perf_cpu_map *map) } } -static struct perf_cpu_map *cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus; + int nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) return NULL; + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus != nr_cpus_conf) { + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + nr_cpus, nr_cpus_conf, nr_cpus); + } + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; @@ -86,9 +93,27 @@ static struct perf_cpu_map *cpu_map__default_new(void) return cpus; } -struct perf_cpu_map *perf_cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) { - return cpu_map__default_new(); + struct perf_cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (onlnf) { + cpus = perf_cpu_map__read(onlnf); + fclose(onlnf); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) +{ + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return cpu_map__new_sysconf(); } @@ -180,27 +205,11 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); out_free_tmp: free(tmp_cpus); return cpus; } -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); - return cpus; -} - struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) { struct perf_cpu_map *cpus = NULL; @@ -211,7 +220,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int max_entries = 0; if (!cpu_list) - return cpu_map__read_all_cpu_map(); + return perf_cpu_map__new_online_cpus(); /* * must handle the case of empty cpumap to cover @@ -268,10 +277,12 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else - cpus = perf_cpu_map__dummy_new(); + else if (*cpu_list != '\0') { + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", + cpu_list); + cpus = perf_cpu_map__new_online_cpus(); + } else + cpus = perf_cpu_map__new_any_cpu(); invalid: free(tmp_cpus); out: @@ -300,7 +311,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) return cpus ? __perf_cpu_map__nr(cpus) : 1; } -bool perf_cpu_map__empty(const struct perf_cpu_map *map) +bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) { return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 3acbbccc1901..058e3ff10f9b 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (evsel->system_wide) { /* System wide: set the cpu map of the evsel to all online CPUs. */ perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__new(NULL); + evsel->cpus = perf_cpu_map__new_online_cpus(); } else if (evlist->has_user_cpus && evsel->is_pmu_core) { /* * User requested CPUs on a core PMU, ensure the requested CPUs @@ -619,7 +619,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { /* Plus one for each thread */ nr_mmaps += perf_thread_map__nr(evlist->threads); /* Minus the per-thread CPU (-1) */ @@ -653,7 +653,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return mmap_per_thread(evlist, ops, mp); return mmap_per_cpu(evlist, ops, mp); diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8b51b008a81f..c07160953224 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, static struct perf_cpu_map *empty_cpu_map; if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9d8..5f08cab61ece 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -33,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index e38d859a384d..228c6c629b0c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -19,10 +19,23 @@ struct perf_cache { struct perf_cpu_map; /** - * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. */ -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); @@ -31,12 +44,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); /** - * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. */ -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, @@ -51,6 +75,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + #define perf_cpu_map__for_each_idx(idx, cpus) \ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 190b56ae923a..10b3f3722642 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -1,15 +1,15 @@ LIBPERF_0.0.1 { global: libperf_init; - perf_cpu_map__dummy_new; - perf_cpu_map__default_new; + perf_cpu_map__new_any_cpu; + perf_cpu_map__new_online_cpus; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; - perf_cpu_map__empty; + perf_cpu_map__has_any_cpu_or_is_empty; perf_cpu_map__max; perf_cpu_map__has; perf_thread_map__new_array; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 2184814b37dd..0c903c2372c9 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -19,6 +19,7 @@ void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb) { + /* Assume fields were zero initialized. */ map->fd = -1; map->overwrite = overwrite; map->unmap_cb = unmap_cb; @@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, void perf_mmap__munmap(struct perf_mmap *map) { - if (map && map->base != NULL) { + if (!map) + return; + + zfree(&map->event_copy); + map->event_copy_sz = 0; + if (map->base) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } - if (map && map->unmap_cb) + if (map->unmap_cb) map->unmap_cb(map); } @@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, */ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; + unsigned int len = size, cpy; void *dst = map->event_copy; + if (size > map->event_copy_sz) { + dst = realloc(map->event_copy, size); + if (!dst) + return NULL; + map->event_copy = dst; + map->event_copy_sz = size; + } + do { cpy = min(map->mask + 1 - (offset & map->mask), len); memcpy(dst, &data[offset & map->mask], cpy); diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 87b0510a556f..c998b1dae863 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); if (!cpus) return -1; @@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - cpus = perf_cpu_map__default_new(); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return -1; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ed616fc19b4f..10f70cb41ff1 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -46,7 +46,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -261,7 +261,7 @@ static int test_mmap_thread(void) threads = perf_thread_map__new_dummy(); __T("failed to create threads", threads); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); __T("failed to create cpus", cpus); perf_thread_map__set_pid(threads, 0, pid); @@ -350,7 +350,7 @@ static int test_mmap_cpus(void) attr.config = id; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a11fc51bfb68..545ec3150546 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -27,7 +27,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evsel = perf_evsel__new(&attr); diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index adfbae27dc36..8561b0f01a24 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -52,11 +52,21 @@ void uniq(struct cmdnames *cmds) if (!cmds->cnt) return; - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - + for (i = 1; i < cmds->cnt; i++) { + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + zfree(&cmds->names[i - 1]); + } + for (i = 0, j = 0; i < cmds->cnt; i++) { + if (cmds->names[i]) { + if (i == j) + j++; + else + cmds->names[j++] = cmds->names[i]; + } + } cmds->cnt = j; + while (j < i) + cmds->names[j++] = NULL; } void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f533e76fb480..f5b81d439387 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log +tests/shell/*.shellcheck_log +tests/shell/coresight/*.shellcheck_log +tests/shell/lib/*.shellcheck_log feature/ libapi/ libbpf/ @@ -49,3 +52,4 @@ libtraceevent/ libtraceevent_plugins/ fixdep Documentation/doc.dep +python_ext_build/ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index a97f95825b14..19cc179be9a7 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -25,6 +25,7 @@ q quicker (less detailed) decoding A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) + T use the timestamp trace as kernel time The default is all events i.e. the same as --itrace=iybxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index fe168e8165c8..b95524bea021 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -155,6 +155,17 @@ include::itrace.txt[] stdio or stdio2 (Default: 0). Note that this is about selection of functions to display, not about lines within the function. +--data-type[=TYPE_NAME]:: + Display data type annotation instead of code. It infers data type of + samples (if they are memory accessing instructions) using DWARF debug + information. It can take an optional argument of data type name. In + that case it'd show annotation for the type only, otherwise it'd show + all data types it finds. + +--type-stat:: + Show stats for the data type annotation. + + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 0b4e79dbd3f6..379f9d7a8ab1 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -251,7 +251,8 @@ annotate.*:: addr2line binary to use for file names and line numbers. annotate.objdump:: - objdump binary to use for disassembly and annotations. + objdump binary to use for disassembly and annotations, + including in the 'perf test' command. annotate.disassembler_style:: Use this to change the default disassembler style to some other value @@ -722,7 +723,6 @@ session-<NAME>.*:: Defines new record session for daemon. The value is record's command line without the 'record' keyword. - SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index d5f78e125efe..1b90575ee3c8 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS which supports up to precise-level 2, and precise level 3 for some special cases -On AMD systems it is implemented using IBS (up to precise-level 2). -The precise modifier works with event types 0x76 (cpu-cycles, CPU -clocks not halted) and 0xC1 (micro-ops retired). Both events map to -IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see the +On AMD systems it is implemented using IBS OP (up to precise-level 2). +Unlike Intel PEBS which provides levels of precision, AMD core pmu is +inherently non-precise and IBS is inherently precise. (i.e. ibs_op//, +ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier +works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 +(micro-ops retired). Both events map to IBS execution sampling (IBS op) +with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) section of the [AMD Processor Programming Reference (PPR)] relevant to the family, model and stepping of the processor being used). diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 503abcba1438..f5938d616d75 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -119,7 +119,7 @@ INFO OPTIONS CONTENTION OPTIONS --------------- +------------------ -k:: --key=<value>:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1889f66addf2..6015fdd08fb6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -445,6 +445,10 @@ following filters are defined: 4th-Gen Xeon+ server), the save branch type is unconditionally enabled when the taken branch stack sampling is enabled. - priv: save privilege state during sampling in case binary is not available later + - counter: save occurrences of the event since the last branch entry. Currently, the + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and + later platforms. An error out is expected if it's used on the unsupported + kernel or CPUs. + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index af068b4f1e5a..38f59ac064f7 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -118,6 +118,9 @@ OPTIONS - retire_lat: On X86, this reports pipeline stall of this instruction compared to the previous instruction in cycles. And currently supported only on X86 - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate + - type: Data type of sample memory access. + - typeoff: Offset in the data type of sample memory access. + - symoff: Offset in the symbol. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8f789fa1242e..5af2e432b54f 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -422,7 +422,34 @@ See perf list output for the possible metrics and metricgroups. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs. +--no-merge:: +Do not aggregate/merge counts across monitored CPUs or PMUs. + +When multiple events are created from a single event specification, +stat will, by default, aggregate the event counts and show the result +in a single row. This option disables that behavior and shows the +individual events and counts. + +Multiple events are created from a single event specification when: + +1. PID monitoring isn't requested and the system has more than one + CPU. For example, a system with 8 SMT threads will have one event + opened on each thread and aggregation is performed across them. + +2. Prefix or glob wildcard matching is used for the PMU name. For + example, multiple memory controller PMUs may exist typically with a + suffix of _0, _1, etc. By default the event counts will all be + combined if the PMU is specified without the suffix such as + uncore_imc rather than uncore_imc_0. + +3. Aliases, which are listed immediately after the Kernel PMU events + by perf list, are used. + +--hybrid-merge:: +Merge core event counts from all core PMUs. In hybrid or big.LITTLE +systems by default each core PMU will report its count +separately. This option forces core PMU counts to be combined to give +a behavior closer to having a single CPU type in the system. --topdown:: Print top-down metrics supported by the CPU. This allows to determine @@ -475,29 +502,6 @@ highlight 'tma_frontend_bound'. This metric may be drilled into with Error out if the input is higher than the supported max level. ---no-merge:: -Do not merge results from same PMUs. - -When multiple events are created from a single event specification, -stat will, by default, aggregate the event counts and show the result -in a single row. This option disables that behavior and shows -the individual events and counts. - -Multiple events are created from a single event specification when: -1. Prefix or glob matching is used for the PMU name. -2. Aliases, which are listed immediately after the Kernel PMU events - by perf list, are used. - ---hybrid-merge:: -Merge the hybrid event counts from all PMUs. - -For hybrid events, by default, the stat aggregates and reports the event -counts per PMU. But sometimes, it's also useful to aggregate event counts -from all PMUs. This option enables that behavior and reports the counts -without PMUs. - -For non-hybrid events, it should be no effect. - --smi-cost:: Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index ba3df49c169d..a7cf7bc2f968 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -64,6 +64,9 @@ OPTIONS perf-event-open - Print perf_event_open() arguments and return value +--debug-file:: + Write debug output to a specified file. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b3e6ed10f40c..aa55850fbc21 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -476,6 +476,11 @@ else else CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT endif # dwarf_getlocations + ifneq ($(feature-dwarf_getcfi), 1) + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142); + else + CFLAGS += -DHAVE_DWARF_CFI_SUPPORT + endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -680,15 +685,15 @@ ifndef BUILD_BPF_SKEL endif ifeq ($(BUILD_BPF_SKEL),1) - ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) - dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) - BUILD_BPF_SKEL := 0 - else ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) + ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool) BUILD_BPF_SKEL := 0 else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool) BUILD_BPF_SKEL := 0 + else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) + dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) + BUILD_BPF_SKEL := 0 else ifeq ($(call get-executable,$(CLANG)),) dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing) BUILD_BPF_SKEL := 0 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 058c9aecf608..27e7c478880f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -134,6 +134,8 @@ include ../scripts/utilities.mak # x86 instruction decoder - new instructions test # # Define GEN_VMLINUX_H to generate vmlinux.h from the BTF. +# +# Define NO_SHELLCHECK if you do not want to run shellcheck during build # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -227,8 +229,15 @@ else force_fixdep := $(config) endif +# Runs shellcheck on perf test shell scripts +ifeq ($(NO_SHELLCHECK),1) + SHELLCHECK := +else + SHELLCHECK := $(shell which shellcheck 2> /dev/null) +endif + export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR HOSTCFLAGS +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK include $(srctree)/tools/build/Makefile.include @@ -1152,7 +1161,7 @@ bpf-skel-clean: clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) - $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -o -name '*.shellcheck_log' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2cf873d71dff..77e6663c1703 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, { int i, err = -EINVAL; struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* Set option of each CPU we have */ for (i = 0; i < cpu__max_cpu().cpu; i++) { @@ -211,7 +211,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, * program can run on any CPUs in this case, thus don't skip * validation. */ - if (!perf_cpu_map__empty(event_cpus) && + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && !perf_cpu_map__has(event_cpus, cpu)) continue; @@ -435,7 +435,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * Also the case of per-cpu mmaps, need the contextID in order to be notified * when a context switch happened. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, "timestamp", 1); evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, @@ -461,7 +461,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(evsel, TIME); err = cs_etm_validate_config(itr, cs_etm_evsel); @@ -536,10 +536,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, int i; int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* cpu map is not empty, we have specific CPUs to work with */ - if (!perf_cpu_map__empty(event_cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { for (i = 0; i < cpu__max_cpu().cpu; i++) { struct perf_cpu cpu = { .cpu = i, }; @@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, u64 nr_cpu, type; struct perf_cpu_map *cpu_map; struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; @@ -814,7 +814,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return -EINVAL; /* If the cpu_map is empty all online CPUs are involved */ - if (perf_cpu_map__empty(event_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { cpu_map = online_cpus; } else { /* Make sure all specified CPUs are online */ diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3acc739bd00..51ccbfd3d246 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, sample CPU for AUX event; * also enable the timestamp tracing for samples correlation. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(arm_spe_evsel, CPU); evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, "ts_enable", 1); @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); evsel__set_sample_bit(tracking_evsel, CPU); diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index a2eef9ec5491..97037499152e 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) int get_cpuid(char *buf, size_t sz) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); int ret; if (!cpus) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index 98e19c5366ac..21cc7e4149f7 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -61,10 +61,10 @@ static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, st const char *c = strchr(ops->raw, '#'); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) c = NULL; if (c++ != NULL) diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c index eb152770f148..40f5d17fedab 100644 --- a/tools/perf/arch/x86/tests/hybrid.c +++ b/tools/perf/arch/x86/tests/hybrid.c @@ -47,7 +47,7 @@ static int test__hybrid_hw_group_event(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return TEST_OK; } @@ -102,7 +102,7 @@ static int test__hybrid_group_modifier1(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -163,6 +163,24 @@ static int test__checkevent_pmu(struct evlist *evlist) return TEST_OK; } +static int test__hybrid_hw_group_event_2(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == 0x3c); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); + return TEST_OK; +} + struct evlist_test { const char *name; bool (*valid)(void); @@ -171,27 +189,27 @@ struct evlist_test { static const struct evlist_test test__hybrid_events[] = { { - .name = "cpu_core/cpu-cycles/", + .name = "cpu_core/cycles/", .check = test__hybrid_hw_event_with_pmu, /* 0 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", + .name = "{cpu_core/cycles/,cpu_core/branches/}", .check = test__hybrid_hw_group_event, /* 1 */ }, { - .name = "{cpu-clock,cpu_core/cpu-cycles/}", + .name = "{cpu-clock,cpu_core/cycles/}", .check = test__hybrid_sw_hw_group_event, /* 2 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu-clock}", + .name = "{cpu_core/cycles/,cpu-clock}", .check = test__hybrid_hw_sw_group_event, /* 3 */ }, { - .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", + .name = "{cpu_core/cycles/k,cpu_core/branches/u}", .check = test__hybrid_group_modifier1, /* 4 */ }, @@ -215,6 +233,11 @@ static const struct evlist_test test__hybrid_events[] = { .check = test__hybrid_cache_event, /* 8 */ }, + { + .name = "{cpu_core/cycles/,cpu_core/cpu-cycles/}", + .check = test__hybrid_hw_group_event_2, + /* 9 */ + }, }; static int test_event(const struct evlist_test *e) diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 530934805710..399c4a0a29d8 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -113,3 +113,41 @@ int regs_query_register_offset(const char *name) return roff->offset; return -EINVAL; } + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_arch_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 5741ffe47312..e65b7dbe27fb 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -14,66 +14,79 @@ #if defined(__x86_64__) -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) +struct perf_event__synthesize_extra_kmaps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) { - int rc = 0; - struct map_rb_node *pos; - struct maps *kmaps = machine__kernel_maps(machine); - union perf_event *event = zalloc(sizeof(event->mmap) + - machine->id_hdr_size); + struct perf_event__synthesize_extra_kmaps_cb_args *args = data; + union perf_event *event = args->event; + struct kmap *kmap; + size_t size; - if (!event) { - pr_debug("Not enough memory synthesizing mmap event " - "for extra kernel maps\n"); - return -1; - } + if (!__map__is_extra_kernel_map(map)) + return 0; - maps__for_each_entry(kmaps, pos) { - struct kmap *kmap; - size_t size; - struct map *map = pos->map; + kmap = map__kmap(map); - if (!__map__is_extra_kernel_map(map)) - continue; + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + args->machine->id_hdr_size; - kmap = map__kmap(map); + memset(event, 0, size); - size = sizeof(event->mmap) - sizeof(event->mmap.filename) + - PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + - machine->id_hdr_size; + event->mmap.header.type = PERF_RECORD_MMAP; - memset(event, 0, size); + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(args->machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = size; - /* - * kernel uses 0 for user space maps, see kernel/perf_event.c - * __perf_event_mmap - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; - else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pgoff = map__pgoff(map); + event->mmap.pid = args->machine->pid; - event->mmap.header.size = size; + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pgoff = map__pgoff(map); - event->mmap.pid = machine->pid; + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; - strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + return 0; +} - if (perf_tool__process_synth_event(tool, event, machine, - process) != 0) { - rc = -1; - break; - } +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc; + struct maps *kmaps = machine__kernel_maps(machine); + struct perf_event__synthesize_extra_kmaps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + .event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size), + }; + + if (!args.event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; } - free(event); + rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args); + + free(args.event); return rc; } diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d2c8cac11470..af8ae4647585 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; - if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) { + if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); return -EINVAL; } @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_bts_evsel, CPU); } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index fa0c718b9e72..d199619df3ab 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, ui__warning("Intel Processor Trace: TSC not available\n"); } - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); + per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; @@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__empty(cpus) && + if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && @@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_pt_evsel, CPU); } @@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->immediate = true; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ evsel__set_sample_bit(tracking_evsel, CPU); @@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Warn the user when we do not have enough information to decode i.e. * per-cpu with no sched_switch (except workload-only). */ - if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && + if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !target__none(&opts->target) && !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 6bfffe83dde9..d3db73dac66a 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index cb5174b53940..06bb3187660a 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2005a3fa3026..0c69d20efa32 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 092cbd52db82..7a4973346180 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index c0035990a33c..d9ad736c1a3e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 5ab0234d74e6..b66df553e561 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "mlockall"); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 18a5894af8bb..690fd6d3da13 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c index a01c40131493..269c1f4a6852 100644 --- a/tools/perf/bench/sched-seccomp-notify.c +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -32,7 +32,7 @@ static bool sync_mode; static const struct option options[] = { OPT_U64('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('s', "sync-mode", &sync_mode, - "Enable the synchronious mode for seccomp notifications"), + "Enable the synchronous mode for seccomp notifications"), OPT_END() }; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index aeeb801f1ed7..6c1cc797692d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -20,6 +20,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/annotate.h" +#include "util/annotate-data.h" #include "util/event.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" @@ -45,7 +46,6 @@ struct perf_annotate { struct perf_tool tool; struct perf_session *session; - struct annotation_options opts; #ifdef HAVE_SLANG_SUPPORT bool use_tui; #endif @@ -56,9 +56,13 @@ struct perf_annotate { bool skip_missing; bool has_br_stack; bool group_set; + bool data_type; + bool type_stat; + bool insn_stat; float min_percent; const char *sym_hist_filter; const char *cpu_list; + const char *target_data_type; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -94,6 +98,7 @@ static void process_basic_block(struct addr_map_symbol *start, struct annotation *notes = sym ? symbol__annotation(sym) : NULL; struct block_range_iter iter; struct block_range *entry; + struct annotated_branch *branch; /* * Sanity; NULL isn't executable and the CPU cannot execute backwards @@ -105,6 +110,8 @@ static void process_basic_block(struct addr_map_symbol *start, if (!block_range_iter__valid(&iter)) return; + branch = annotation__get_branch(notes); + /* * First block in range is a branch target. */ @@ -118,8 +125,8 @@ static void process_basic_block(struct addr_map_symbol *start, entry->coverage++; entry->sym = sym; - if (notes) - notes->max_coverage = max(notes->max_coverage, entry->coverage); + if (branch) + branch->max_coverage = max(branch->max_coverage, entry->coverage); } while (block_range_iter__next(&iter)); @@ -315,9 +322,153 @@ static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(&he->ms, evsel, &ann->opts); + return symbol__tty_annotate(&he->ms, evsel); + + return symbol__tty_annotate2(&he->ms, evsel); +} + +static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel) +{ + struct dso *dso = map__dso(he->ms.map); + int nr_members = 1; + int nr_samples = he->stat.nr_events; + + if (evsel__is_group_event(evsel)) { + struct hist_entry *pair; + + list_for_each_entry(pair, &he->pairs.head, pairs.node) + nr_samples += pair->stat.nr_events; + } + + printf("Annotate type: '%s' in %s (%d samples):\n", + he->mem_type->self.type_name, dso->name, nr_samples); + + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + int i = 0; + + for_each_group_evsel(pos, evsel) + printf(" event[%d] = %s\n", i++, pos->name); + + nr_members = evsel->core.nr_members; + } + + printf("============================================================================\n"); + printf("%*s %10s %10s %s\n", 11 * nr_members, "samples", "offset", "size", "field"); +} + +static void print_annotated_data_type(struct annotated_data_type *mem_type, + struct annotated_member *member, + struct evsel *evsel, int indent) +{ + struct annotated_member *child; + struct type_hist *h = mem_type->histograms[evsel->core.idx]; + int i, nr_events = 1, samples = 0; + + for (i = 0; i < member->size; i++) + samples += h->addr[member->offset + i].nr_samples; + printf(" %10d", samples); - return symbol__tty_annotate2(&he->ms, evsel, &ann->opts); + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + + for_each_group_member(pos, evsel) { + h = mem_type->histograms[pos->core.idx]; + + samples = 0; + for (i = 0; i < member->size; i++) + samples += h->addr[member->offset + i].nr_samples; + printf(" %10d", samples); + } + nr_events = evsel->core.nr_members; + } + + printf(" %10d %10d %*s%s\t%s", + member->offset, member->size, indent, "", member->type_name, + member->var_name ?: ""); + + if (!list_empty(&member->children)) + printf(" {\n"); + + list_for_each_entry(child, &member->children, node) + print_annotated_data_type(mem_type, child, evsel, indent + 4); + + if (!list_empty(&member->children)) + printf("%*s}", 11 * nr_events + 24 + indent, ""); + printf(";\n"); +} + +static void print_annotate_data_stat(struct annotated_data_stat *s) +{ +#define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld) + + int bad = s->no_sym + + s->no_insn + + s->no_insn_ops + + s->no_mem_ops + + s->no_reg + + s->no_dbginfo + + s->no_cuinfo + + s->no_var + + s->no_typeinfo + + s->invalid_size + + s->bad_offset; + int ok = s->total - bad; + + printf("Annotate data type stats:\n"); + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n", + s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1)); + printf("-----------------------------------------------------------\n"); + PRINT_STAT(no_sym); + PRINT_STAT(no_insn); + PRINT_STAT(no_insn_ops); + PRINT_STAT(no_mem_ops); + PRINT_STAT(no_reg); + PRINT_STAT(no_dbginfo); + PRINT_STAT(no_cuinfo); + PRINT_STAT(no_var); + PRINT_STAT(no_typeinfo); + PRINT_STAT(invalid_size); + PRINT_STAT(bad_offset); + printf("\n"); + +#undef PRINT_STAT +} + +static void print_annotate_item_stat(struct list_head *head, const char *title) +{ + struct annotated_item_stat *istat, *pos, *iter; + int total_good, total_bad, total; + int sum1, sum2; + LIST_HEAD(tmp); + + /* sort the list by count */ + list_splice_init(head, &tmp); + total_good = total_bad = 0; + + list_for_each_entry_safe(istat, pos, &tmp, list) { + total_good += istat->good; + total_bad += istat->bad; + sum1 = istat->good + istat->bad; + + list_for_each_entry(iter, head, list) { + sum2 = iter->good + iter->bad; + if (sum1 > sum2) + break; + } + list_move_tail(&istat->list, &iter->list); + } + total = total_good + total_bad; + + printf("Annotate %s stats\n", title); + printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n\n", total, + total_good, 100.0 * total_good / (total ?: 1), + total_bad, 100.0 * total_bad / (total ?: 1)); + printf(" %-10s: %5s %5s\n", "Name", "Good", "Bad"); + printf("-----------------------------------------------------------\n"); + list_for_each_entry(istat, head, list) + printf(" %-10s: %5d %5d\n", istat->name, istat->good, istat->bad); + printf("\n"); } static void hists__find_annotations(struct hists *hists, @@ -327,6 +478,11 @@ static void hists__find_annotations(struct hists *hists, struct rb_node *nd = rb_first_cached(&hists->entries), *next; int key = K_RIGHT; + if (ann->type_stat) + print_annotate_data_stat(&ann_data_stat); + if (ann->insn_stat) + print_annotate_item_stat(&ann_insn_stat, "Instruction"); + while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); struct annotation *notes; @@ -359,11 +515,38 @@ find_next: continue; } + if (ann->data_type) { + /* skip unknown type */ + if (he->mem_type->histograms == NULL) + goto find_next; + + if (ann->target_data_type) { + const char *type_name = he->mem_type->self.type_name; + + /* skip 'struct ' prefix in the type name */ + if (strncmp(ann->target_data_type, "struct ", 7) && + !strncmp(type_name, "struct ", 7)) + type_name += 7; + + /* skip 'union ' prefix in the type name */ + if (strncmp(ann->target_data_type, "union ", 6) && + !strncmp(type_name, "union ", 6)) + type_name += 6; + + if (strcmp(ann->target_data_type, type_name)) + goto find_next; + } + + print_annotated_data_header(he, evsel); + print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0); + printf("\n"); + goto find_next; + } + if (use_browser == 2) { int ret; int (*annotate)(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); annotate = dlsym(perf_gtk_handle, @@ -373,14 +556,14 @@ find_next: return; } - ret = annotate(he, evsel, &ann->opts, NULL); + ret = annotate(he, evsel, NULL); if (!ret || !ann->skip_missing) return; /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); + key = hist_entry__tui_annotate(he, evsel, NULL); switch (key) { case -1: @@ -422,9 +605,9 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - if (!ann->opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&session->header.env, - &ann->opts.objdump_path); + &annotate_opts.objdump_path); if (ret) goto out; } @@ -457,8 +640,20 @@ static int __cmd_annotate(struct perf_annotate *ann) evsel__reset_sample_bit(pos, CALLCHAIN); evsel__output_resort(pos, NULL); - if (symbol_conf.event_group && !evsel__is_group_leader(pos)) + /* + * An event group needs to display other events too. + * Let's delay printing until other events are processed. + */ + if (symbol_conf.event_group) { + if (!evsel__is_group_leader(pos)) { + struct hists *leader_hists; + + leader_hists = evsel__hists(evsel__leader(pos)); + hists__match(leader_hists, hists); + hists__link(leader_hists, hists); + } continue; + } hists__find_annotations(hists, pos, ann); } @@ -469,6 +664,20 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } + /* Display group events together */ + evlist__for_each_entry(session->evlist, pos) { + struct hists *hists = evsel__hists(pos); + u32 nr_samples = hists->stats.nr_samples; + + if (nr_samples == 0) + continue; + + if (!symbol_conf.event_group || !evsel__is_group_leader(pos)) + continue; + + hists__find_annotations(hists, pos, ann); + } + if (use_browser == 2) { void (*show_annotations)(void); @@ -495,6 +704,17 @@ static int parse_percent_limit(const struct option *opt, const char *str, return 0; } +static int parse_data_type(const struct option *opt, const char *str, int unset) +{ + struct perf_annotate *ann = opt->value; + + ann->data_type = !unset; + if (str) + ann->target_data_type = strdup(str); + + return 0; +} + static const char * const annotate_usage[] = { "perf annotate [<options>]", NULL @@ -558,9 +778,9 @@ int cmd_annotate(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, + OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, + OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path, "Don't shorten the displayed pathnames"), OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), @@ -571,15 +791,15 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK(0, "symfs", NULL, "directory", "Look for files with symbols relative to this directory", symbol__config_symfs), - OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -598,7 +818,7 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", "'always' (default), 'never' or 'auto' only applicable to --stdio mode", stdio__config_color, "always"), - OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_CALLBACK(0, "percent-limit", &annotate, "percent", @@ -606,7 +826,13 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options\n" ITRACE_HELP, itrace_parse_synth_opts), - + OPT_CALLBACK_OPTARG(0, "data-type", &annotate, NULL, "name", + "Show data type annotate for the memory accesses", + parse_data_type), + OPT_BOOLEAN(0, "type-stat", &annotate.type_stat, + "Show stats for the data type annotation"), + OPT_BOOLEAN(0, "insn-stat", &annotate.insn_stat, + "Show instruction stats for the data type annotation"), OPT_END() }; int ret; @@ -614,13 +840,13 @@ int cmd_annotate(int argc, const char **argv) set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); - annotation_options__init(&annotate.opts); + annotation_options__init(); ret = hists__init(); if (ret < 0) return ret; - annotation_config__init(&annotate.opts); + annotation_config__init(); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { @@ -635,13 +861,13 @@ int cmd_annotate(int argc, const char **argv) } if (disassembler_style) { - annotate.opts.disassembler_style = strdup(disassembler_style); - if (!annotate.opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - annotate.opts.objdump_path = strdup(objdump_path); - if (!annotate.opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -650,7 +876,7 @@ int cmd_annotate(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&annotate.opts) < 0) + if (annotate_check_args() < 0) return -EINVAL; #ifdef HAVE_GTK2_SUPPORT @@ -660,6 +886,13 @@ int cmd_annotate(int argc, const char **argv) } #endif +#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT + if (annotate.data_type) { + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); + return -ENOTSUP; + } +#endif + ret = symbol__validate_sym_arguments(); if (ret) return ret; @@ -702,6 +935,14 @@ int cmd_annotate(int argc, const char **argv) use_browser = 2; #endif + /* FIXME: only support stdio for now */ + if (annotate.data_type) { + use_browser = 0; + annotate_opts.annotate_src = false; + symbol_conf.annotate_data_member = true; + symbol_conf.annotate_data_sample = true; + } + setup_browser(true); /* @@ -709,7 +950,10 @@ int cmd_annotate(int argc, const char **argv) * symbol, we do not care about the processes in annotate, * set sort order to avoid repeated output. */ - sort_order = "dso,symbol"; + if (annotate.data_type) + sort_order = "dso,type"; + else + sort_order = "dso,symbol"; /* * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle @@ -731,7 +975,7 @@ out_delete: #ifndef NDEBUG perf_session__delete(annotate.session); #endif - annotation_options__exit(&annotate.opts); + annotation_options__exit(); return ret; } diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a4cf9de7a7b5..f78eea9e2153 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2320,7 +2320,7 @@ static int setup_nodes(struct perf_session *session) nodes[node] = set; /* empty node, skip */ - if (perf_cpu_map__empty(map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(map)) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index ac2e6c75f912..eb30c8eca488 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace) static int reset_tracing_cpu(void) { - struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus(); int ret; ret = set_tracing_cpumask(cpumap); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c8cf2fdd9cff..eb3ef5c24b66 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2265,6 +2265,12 @@ int cmd_inject(int argc, const char **argv) "perf inject [<options>]", NULL }; + + if (!inject.itrace_synth_opts.set) { + /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ + symbol_conf.lazy_load_kernel_maps = true; + } + #ifndef HAVE_JITDUMP set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a3ff2f4edbaa..230461280e45 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -2285,8 +2285,10 @@ setup_args: else ev_name = strdup(contention_tracepoints[j].name); - if (!ev_name) + if (!ev_name) { + free(rec_argv); return -ENOMEM; + } rec_argv[i++] = "-e"; rec_argv[i++] = ev_name; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dcf288a4fb9a..91e6828c38cc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -270,7 +270,7 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused, static int record__aio_enabled(struct record *rec); static int record__comp_enabled(struct record *rec); -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size); #ifdef HAVE_AIO_SUPPORT @@ -405,9 +405,13 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size */ if (record__comp_enabled(aio->rec)) { - size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, - mmap__mmap_len(map) - aio->size, - buf, size); + ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, + mmap__mmap_len(map) - aio->size, + buf, size); + if (compressed < 0) + return (int)compressed; + + size = compressed; } else { memcpy(aio->data + aio->size, buf, size); } @@ -633,7 +637,13 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); + ssize_t compressed = zstd_compress(rec->session, map, map->data, + mmap__mmap_len(map), bf, size); + + if (compressed < 0) + return (int)compressed; + + size = compressed; bf = map->data; } @@ -1350,7 +1360,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -1527,10 +1537,10 @@ static size_t process_comp_header(void *record, size_t increment) return size; } -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size) { - size_t compressed; + ssize_t compressed; size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; struct zstd_data *zstd_data = &session->zstd_data; @@ -1539,6 +1549,8 @@ static size_t zstd_compress(struct perf_session *session, struct mmap *map, compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, max_record_size, process_comp_header); + if (compressed < 0) + return compressed; if (map && map->file) { thread->bytes_transferred += src_size; @@ -1912,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, static void record__read_lost_samples(struct record *rec) { struct perf_session *session = rec->session; - struct perf_record_lost_samples *lost; + struct perf_record_lost_samples *lost = NULL; struct evsel *evsel; /* there was an error during record__open */ if (session->evlist == NULL) return; - lost = zalloc(PERF_SAMPLE_MAX_SIZE); - if (lost == NULL) { - pr_debug("Memory allocation failed\n"); - return; - } - - lost->header.type = PERF_RECORD_LOST_SAMPLES; - evlist__for_each_entry(session->evlist, evsel) { struct xyarray *xy = evsel->core.sample_id; u64 lost_count; @@ -1949,6 +1953,15 @@ static void record__read_lost_samples(struct record *rec) } if (count.lost) { + if (!lost) { + lost = zalloc(sizeof(*lost) + + session->machines.host.id_hdr_size); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, x, y, count.lost, 0); } @@ -1956,9 +1969,19 @@ static void record__read_lost_samples(struct record *rec) } lost_count = perf_bpf_filter__lost_count(evsel); - if (lost_count) + if (lost_count) { + if (!lost) { + lost = zalloc(sizeof(*lost) + + session->machines.host.id_hdr_size); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, PERF_RECORD_MISC_LOST_SAMPLES_BPF); + } } out: free(lost); @@ -2216,32 +2239,6 @@ static void hit_auxtrace_snapshot_trigger(struct record *rec) } } -static void record__uniquify_name(struct record *rec) -{ - struct evsel *pos; - struct evlist *evlist = rec->evlist; - char *new_name; - int ret; - - if (perf_pmus__num_core_pmus() == 1) - return; - - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; - - if (strchr(pos->name, '/')) - continue; - - ret = asprintf(&new_name, "%s/%s/", - pos->pmu_name, pos->name); - if (ret) { - free(pos->name); - pos->name = new_name; - } - } -} - static int record__terminate_thread(struct record_thread *thread_data) { int err; @@ -2475,7 +2472,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (data->is_pipe && rec->evlist->core.nr_entries == 1) rec->opts.sample_id = true; - record__uniquify_name(rec); + evlist__uniquify_name(rec->evlist); /* Debug message used by test scripts */ pr_debug3("perf record opening and mmapping events\n"); @@ -3580,9 +3577,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp if (cpu_map__is_dummy(cpus)) return 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - if (cpu.cpu == -1) - continue; + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; @@ -3989,6 +3984,8 @@ int cmd_record(int argc, const char **argv) # undef set_nobuild #endif + /* Disable eager loading of kernel symbols that adds overhead to perf record. */ + symbol_conf.lazy_load_kernel_maps = true; rec->opts.affinity = PERF_AFFINITY_SYS; rec->evlist = evlist__new(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9cb1da2dc0c0..f2ed2b7e80a3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -96,9 +96,9 @@ struct report { bool stitch_lbr; bool disable_order; bool skip_empty; + bool data_type; int max_stack; struct perf_read_values show_threads_values; - struct annotation_options annotation_opts; const char *pretty_printing_style; const char *cpu_list; const char *symbol_filter_str; @@ -171,7 +171,7 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - if (!ui__has_annotation() && !rep->symbol_ipc) + if (!ui__has_annotation() && !rep->symbol_ipc && !rep->data_type) return 0; if (sort__mode == SORT_MODE__BRANCH) { @@ -541,8 +541,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, - &rep->session->header.env, - &rep->annotation_opts); + &rep->session->header.env); if (ret != 0) return ret; } @@ -574,8 +573,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos, - NULL, NULL); + rep->min_percent, pos, NULL); continue; } @@ -670,7 +668,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true, &rep->annotation_opts); + &session->header.env, true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -745,7 +743,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg) if (rep->symbol_ipc && sym && !sym->annotate2) { struct evsel *evsel = hists_to_evsel(he->hists); - symbol__annotate2(&he->ms, evsel, &rep->annotation_opts, NULL); + symbol__annotate2(&he->ms, evsel, NULL); } return 0; @@ -859,27 +857,47 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } -static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +struct maps__fprintf_task_args { + int indent; + FILE *fp; + size_t printed; +}; + +static int maps__fprintf_task_cb(struct map *map, void *data) { - size_t printed = 0; - struct map_rb_node *rb_node; + struct maps__fprintf_task_args *args = data; + const struct dso *dso = map__dso(map); + u32 prot = map__prot(map); + int ret; - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - const struct dso *dso = map__dso(map); - u32 prot = map__prot(map); + ret = fprintf(args->fp, + "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + args->indent, "", map__start(map), map__end(map), + prot & PROT_READ ? 'r' : '-', + prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-', + map__flags(map) ? 's' : 'p', + map__pgoff(map), + dso->id.ino, dso->name); - printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", - indent, "", map__start(map), map__end(map), - prot & PROT_READ ? 'r' : '-', - prot & PROT_WRITE ? 'w' : '-', - prot & PROT_EXEC ? 'x' : '-', - map__flags(map) ? 's' : 'p', - map__pgoff(map), - dso->id.ino, dso->name); - } + if (ret < 0) + return ret; + + args->printed += ret; + return 0; +} + +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + struct maps__fprintf_task_args args = { + .indent = indent, + .fp = fp, + .printed = 0, + }; - return printed; + maps__for_each_map(maps, maps__fprintf_task_cb, &args); + + return args.printed; } static void task__print_level(struct task *task, FILE *fp, int level) @@ -1341,15 +1359,15 @@ int cmd_report(int argc, const char **argv) "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), - OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), @@ -1401,7 +1419,7 @@ int cmd_report(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), - OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), @@ -1426,7 +1444,14 @@ int cmd_report(int argc, const char **argv) if (ret < 0) goto exit; - annotation_options__init(&report.annotation_opts); + /* + * tasks_mode require access to exited threads to list those that are in + * the data file. Off-cpu events are synthesized after other events and + * reference exited threads. + */ + symbol_conf.keep_exited_threads = true; + + annotation_options__init(); ret = perf_config(report__config, &report); if (ret) @@ -1445,13 +1470,13 @@ int cmd_report(int argc, const char **argv) } if (disassembler_style) { - report.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!report.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - report.annotation_opts.objdump_path = strdup(objdump_path); - if (!report.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1460,7 +1485,7 @@ int cmd_report(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&report.annotation_opts) < 0) { + if (annotate_check_args() < 0) { ret = -EINVAL; goto exit; } @@ -1615,6 +1640,16 @@ repeat: sort_order = NULL; } + if (sort_order && strstr(sort_order, "type")) { + report.data_type = true; + annotate_opts.annotate_src = false; + +#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT + pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); + goto error; +#endif + } + if (strcmp(input_name, "-") != 0) setup_browser(true); else @@ -1673,7 +1708,7 @@ repeat: * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (ui__has_annotation() || report.symbol_ipc || + if (ui__has_annotation() || report.symbol_ipc || report.data_type || report.total_cycles_mode) { ret = symbol__annotation_init(); if (ret < 0) @@ -1692,7 +1727,7 @@ repeat: */ symbol_conf.priv_size += sizeof(u32); } - annotation_config__init(&report.annotation_opts); + annotation_config__init(); } if (symbol__init(&session->header.env) < 0) @@ -1746,7 +1781,7 @@ error: zstd_fini(&(session->zstd_data)); perf_session__delete(session); exit: - annotation_options__exit(&report.annotation_opts); + annotation_options__exit(); free(sort_order_help); free(field_order_help); return ret; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a3af805a1d57..5fe9abc6a524 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -653,7 +653,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if ((evsel__leader(counter) != counter) || !(counter->core.leader->nr_members > 1)) return COUNTER_SKIP; - } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; @@ -1204,8 +1204,9 @@ static struct option stat_options[] = { OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, - "disable CPU count aggregation", AGGR_NONE), - OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), + "disable aggregation across CPUs or PMUs", AGGR_NONE), + OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode, + "disable aggregation the same as -A or -no-aggr", AGGR_NONE), OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, "Merge identical named hybrid events"), OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", @@ -1255,7 +1256,7 @@ static struct option stat_options[] = { OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, "don't try to share events between metrics in a group"), OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, - "don't try to share events between metrics in a group "), + "disable adding events for the metric threshold calculation"), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure top-down statistics"), OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, @@ -1316,7 +1317,7 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map) * be the first online CPU in the cache domain else use the * first online CPU of the cache domain as the ID. */ - if (perf_cpu_map__empty(cpu_map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map)) id = cpu.cpu; else id = perf_cpu_map__cpu(cpu_map, 0).cpu; @@ -1622,7 +1623,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - if (!perf_cpu_map__empty(evsel_list->core.user_requested_cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; else nr = 0; @@ -2289,7 +2290,7 @@ int process_stat_config_event(struct perf_session *session, perf_event__read_stat_config(&stat_config, &event->stat_config); - if (perf_cpu_map__empty(st->cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); } else if (st->aggr_mode != AGGR_UNSET) { @@ -2695,15 +2696,19 @@ int cmd_stat(int argc, const char **argv) */ if (metrics) { const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + int ret = metricgroup__parse_groups(evsel_list, pmu, metrics, + stat_config.metric_no_group, + stat_config.metric_no_merge, + stat_config.metric_no_threshold, + stat_config.user_requested_cpu_list, + stat_config.system_wide, + &stat_config.metric_events); - metricgroup__parse_groups(evsel_list, pmu, metrics, - stat_config.metric_no_group, - stat_config.metric_no_merge, - stat_config.metric_no_threshold, - stat_config.user_requested_cpu_list, - stat_config.system_wide, - &stat_config.metric_events); zfree(&metrics); + if (ret) { + status = ret; + goto out; + } } if (add_default_attributes()) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ea8c7eca5eee..baf1ab083436 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -147,7 +147,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); + err = symbol__annotate(&he->ms, evsel, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -261,9 +261,9 @@ static void perf_top__show_details(struct perf_top *top) goto out_unlock; printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); + printf(" Events Pcnt (>=%d%%)\n", annotate_opts.min_pcnt); - more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); + more = symbol__annotate_printf(&he->ms, top->sym_evsel); if (top->evlist->enabled) { if (top->zero) @@ -450,7 +450,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -553,7 +553,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&top->annotation_opts.min_pcnt, + prompt_percent(&annotate_opts.min_pcnt, "Enter details display event filter (percent)"); break; case 'K': @@ -646,8 +646,7 @@ repeat: } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, !top->record_opts.overwrite, - &top->annotation_opts); + &top->session->header.env, !top->record_opts.overwrite); if (ret == K_RELOAD) { top->zero = true; goto repeat; @@ -1027,8 +1026,8 @@ static int perf_top__start_counters(struct perf_top *top) evlist__for_each_entry(evlist, counter) { try_again: - if (evsel__open(counter, top->evlist->core.user_requested_cpus, - top->evlist->core.threads) < 0) { + if (evsel__open(counter, counter->core.cpus, + counter->core.threads) < 0) { /* * Specially handle overwrite fall back. @@ -1044,7 +1043,7 @@ try_again: perf_top_overwrite_fallback(top, counter)) goto try_again; - if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -1241,9 +1240,9 @@ static int __cmd_top(struct perf_top *top) pthread_t thread, thread_process; int ret; - if (!top->annotation_opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&top->session->header.env, - &top->annotation_opts.objdump_path); + &annotate_opts.objdump_path); if (ret) return ret; } @@ -1299,6 +1298,7 @@ static int __cmd_top(struct perf_top *top) } } + evlist__uniquify_name(top->evlist); ret = perf_top__start_counters(top); if (ret) return ret; @@ -1536,9 +1536,9 @@ int cmd_top(int argc, const char **argv) "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), - OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), @@ -1549,9 +1549,9 @@ int cmd_top(int argc, const char **argv) "addr2line binary to use for line numbers"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", @@ -1609,10 +1609,10 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; - annotation_options__init(&top.annotation_opts); + annotation_options__init(); - top.annotation_opts.min_pcnt = 5; - top.annotation_opts.context = 4; + annotate_opts.min_pcnt = 5; + annotate_opts.context = 4; top.evlist = evlist__new(); if (top.evlist == NULL) @@ -1642,13 +1642,13 @@ int cmd_top(int argc, const char **argv) usage_with_options(top_usage, options); if (disassembler_style) { - top.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!top.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - top.annotation_opts.objdump_path = strdup(objdump_path); - if (!top.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1661,7 +1661,7 @@ int cmd_top(int argc, const char **argv) if (status) goto out_delete_evlist; - if (annotate_check_args(&top.annotation_opts) < 0) + if (annotate_check_args() < 0) goto out_delete_evlist; if (!top.evlist->core.nr_entries) { @@ -1787,7 +1787,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(&top.annotation_opts); + annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); @@ -1840,7 +1840,7 @@ int cmd_top(int argc, const char **argv) out_delete_evlist: evlist__delete(top.evlist); perf_session__delete(top.session); - annotation_options__exit(&top.annotation_opts); + annotation_options__exit(); return status; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e541d0e2777a..109b8e64fe69 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2470,9 +2470,8 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam static const char *errno_to_name(struct evsel *evsel, int err) { struct perf_env *env = evsel__env(evsel); - const char *arch_name = perf_env__arch(env); - return arch_syscalls__strerrno(arch_name, err); + return perf_env__arch_strerrno(env, err); } static int trace__sys_exit(struct trace *trace, struct evsel *evsel, @@ -4264,12 +4263,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); if (trace->errno_summary && stats->nr_failures) { - const char *arch_name = perf_env__arch(trace->host->env); int e; for (e = 0; e < stats->max_errno; ++e) { if (stats->errnos[e] != 0) - fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); + fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]); } } } diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 133f0eddbcc4..f94795794b36 100644..100755 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -4,8 +4,73 @@ # Arnaldo Carvalho de Melo <acme@redhat.com> PERF_DATA=perf.data -if [ $# -ne 0 ] ; then - PERF_DATA=$1 +PERF_SYMBOLS=perf.symbols +PERF_ALL=perf.all +ALL=0 +UNPACK=0 + +while [ $# -gt 0 ] ; do + if [ $1 == "--all" ]; then + ALL=1 + shift + elif [ $1 == "--unpack" ]; then + UNPACK=1 + shift + else + PERF_DATA=$1 + UNPACK_TAR=$1 + shift + fi +done + +if [ $UNPACK -eq 1 ]; then + if [ ! -z "$UNPACK_TAR" ]; then # tar given as an argument + if [ ! -e "$UNPACK_TAR" ]; then + echo "Provided file $UNPACK_TAR does not exist" + exit 1 + fi + TARGET="$UNPACK_TAR" + else # search for perf tar in the current directory + TARGET=`find . -regex "\./perf.*\.tar\.bz2"` + TARGET_NUM=`echo -n "$TARGET" | grep -c '^'` + + if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then + echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET" + echo "Provide the requested file as an argument" + exit 1 + else + echo "Found target file for unpacking: $TARGET" + fi + fi + + if [[ "$TARGET" =~ (\./)?$PERF_ALL.*.tar.bz2 ]]; then # perf tar generated by --all option + TAR_CONTENTS=`tar tvf "$TARGET" | tr -s " " | cut -d " " -f 6` + VALID_TAR=`echo "$TAR_CONTENTS" | grep "$PERF_SYMBOLS.tar.bz2" | wc -l` # check if it contains a sub-tar perf.symbols + if [ $VALID_TAR -ne 1 ]; then + echo "Error: $TARGET file is not valid (contains zero or multiple sub-tar files with debug symbols)" + exit 1 + fi + + INTERSECT=`comm -12 <(ls) <(echo "$TAR_CONTENTS") | tr "\n" " "` # check for overwriting + if [ ! -z "$INTERSECT" ]; then # prompt if file(s) already exist in the current directory + echo "File(s) ${INTERSECT::-1} already exist in the current directory." + while true; do + read -p 'Do you wish to overwrite them? ' yn + case $yn in + [Yy]* ) break;; + [Nn]* ) exit 1;; + * ) echo "Please answer yes or no.";; + esac + done + fi + + # unzip the perf.data file in the current working directory and debug symbols in ~/.debug directory + tar xvf $TARGET && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug + + else # perf tar generated by perf archive (contains only debug symbols) + tar xvf $TARGET -C ~/.debug + fi + exit 0 fi # @@ -39,9 +104,18 @@ while read build_id ; do echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done -tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST -rm $MANIFEST $BUILDIDS || true +if [ $ALL -eq 1 ]; then # pack perf.data file together with tar containing debug symbols + HOSTNAME=$(hostname) + DATE=$(date '+%Y%m%d-%H%M%S') + tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + tar cjf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2 + rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true +else # pack only the debug symbols + tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + rm $MANIFEST $BUILDIDS || true +fi + echo -e "Now please run:\n" -echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" -echo "wherever you need to run 'perf report' on." +echo -e "$ perf archive --unpack\n" +echo "or unpack the tar manually wherever you need to run 'perf report' on." exit 0 diff --git a/tools/perf/perf.c b/tools/perf/perf.c index d3fc8090413c..921bee0a6437 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -39,6 +39,7 @@ #include <linux/zalloc.h> static int use_pager = -1; +static FILE *debug_fp = NULL; struct cmd_struct { const char *cmd; @@ -162,6 +163,19 @@ static void commit_pager_choice(void) } } +static int set_debug_file(const char *path) +{ + debug_fp = fopen(path, "w"); + if (!debug_fp) { + fprintf(stderr, "Open debug file '%s' failed: %s\n", + path, strerror(errno)); + return -1; + } + + debug_set_file(debug_fp); + return 0; +} + struct option options[] = { OPT_ARGUMENT("help", "help"), OPT_ARGUMENT("version", "version"), @@ -174,6 +188,7 @@ struct option options[] = { OPT_ARGUMENT("list-cmds", "list-cmds"), OPT_ARGUMENT("list-opts", "list-opts"), OPT_ARGUMENT("debug", "debug"), + OPT_ARGUMENT("debug-file", "debug-file"), OPT_END() }; @@ -287,6 +302,18 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argv)++; (*argc)--; + } else if (!strcmp(cmd, "--debug-file")) { + if (*argc < 2) { + fprintf(stderr, "No path given for --debug-file.\n"); + usage(perf_usage_string); + } + + if (set_debug_file((*argv)[1])) + usage(perf_usage_string); + + (*argv)++; + (*argc)--; + } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -547,5 +574,8 @@ int main(int argc, const char **argv) fprintf(stderr, "Failed to run command '%s': %s\n", cmd, str_error_r(errno, sbuf, sizeof(sbuf))); out: + if (debug_fp) + fclose(debug_fp); + return 1; } diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json index 88b23b85e33c..879ff21e0b17 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json @@ -110,7 +110,7 @@ { "PublicDescription": "Flushes due to memory hazards", "EventCode": "0x121", - "EventName": "BPU_FLUSH_MEM_FAULT", + "EventName": "GPC_FLUSH_MEM_FAULT", "BriefDescription": "Flushes due to memory hazards" }, { diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json new file mode 100644 index 000000000000..a632755fc086 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json @@ -0,0 +1,125 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + }, + { + "PublicDescription": "Instruction architecturally executed, branch not taken", + "EventCode": "0x8107", + "EventName": "BR_SKIP_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, immediate branch taken", + "EventCode": "0x8108", + "EventName": "BR_IMMED_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, immediate branch taken" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired", + "EventCode": "0x810c", + "EventName": "BR_INDNR_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted immediate branch", + "EventCode": "0x8110", + "EventName": "BR_IMMED_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch", + "EventCode": "0x8111", + "EventName": "BR_IMMED_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch", + "EventCode": "0x8112", + "EventName": "BR_IND_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch", + "EventCode": "0x8113", + "EventName": "BR_IND_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted procedure return", + "EventCode": "0x8114", + "EventName": "BR_RETURN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted procedure return", + "EventCode": "0x8115", + "EventName": "BR_RETURN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return", + "EventCode": "0x8116", + "EventName": "BR_INDNR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return", + "EventCode": "0x8117", + "EventName": "BR_INDNR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, taken", + "EventCode": "0x8118", + "EventName": "BR_TAKEN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken", + "EventCode": "0x8119", + "EventName": "BR_TAKEN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, not taken", + "EventCode": "0x811a", + "EventName": "BR_SKIP_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken", + "EventCode": "0x811b", + "EventName": "BR_SKIP_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch", + "EventCode": "0x811c", + "EventName": "BR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch", + "EventCode": "0x811d", + "EventName": "BR_IND_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch" + }, + { + "PublicDescription": "Branch Record captured.", + "EventCode": "0x811f", + "EventName": "BRB_FILTRATE", + "BriefDescription": "Branch Record captured." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json new file mode 100644 index 000000000000..2aeb9907831d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json new file mode 100644 index 000000000000..c50d8e930b05 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json @@ -0,0 +1,206 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "PublicDescription": "Level 1 data or unified cache demand access", + "EventCode": "0x8140", + "EventName": "L1D_CACHE_RW", + "BriefDescription": "Level 1 data or unified cache demand access" + }, + { + "PublicDescription": "Level 1 data or unified cache preload or prefetch", + "EventCode": "0x8142", + "EventName": "L1D_CACHE_PRFM", + "BriefDescription": "Level 1 data or unified cache preload or prefetch" + }, + { + "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch", + "EventCode": "0x8146", + "EventName": "L1D_CACHE_REFILL_PRFM", + "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "PublicDescription": "L1D TLB miss", + "EventCode": "0xD600", + "EventName": "L1D_TLB_MISS", + "BriefDescription": "L1D TLB miss" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch requests generated", + "EventCode": "0xd606", + "EventName": "L1_PREFETCH_LD_GEN", + "BriefDescription": "Level 1 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache", + "EventCode": "0xd607", + "EventName": "L1_PREFETCH_LD_FILL", + "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated", + "EventCode": "0xd608", + "EventName": "L1_PREFETCH_L2_REQ", + "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated" + }, + { + "PublicDescription": "L1 prefetcher, distance was reset", + "EventCode": "0xd609", + "EventName": "L1_PREFETCH_DIST_RST", + "BriefDescription": "L1 prefetcher, distance was reset" + }, + { + "PublicDescription": "L1 prefetcher, distance was increased", + "EventCode": "0xd60a", + "EventName": "L1_PREFETCH_DIST_INC", + "BriefDescription": "L1 prefetcher, distance was increased" + }, + { + "PublicDescription": "Level 1 prefetcher, table entry is trained", + "EventCode": "0xd60b", + "EventName": "L1_PREFETCH_ENTRY_TRAINED", + "BriefDescription": "Level 1 prefetcher, table entry is trained" + }, + { + "PublicDescription": "L1 data cache refill - Read or Write", + "EventCode": "0xd60e", + "EventName": "L1D_CACHE_REFILL_RW", + "BriefDescription": "L1 data cache refill - Read or Write" + }, + { + "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills", + "EventCode": "0xD701", + "EventName": "L2C_INST_REFILL", + "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills" + }, + { + "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills", + "EventCode": "0xD702", + "EventName": "L2C_DATA_REFILL", + "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills" + }, + { + "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated", + "EventCode": "0xD703", + "EventName": "L2_PREFETCH_REQ", + "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json new file mode 100644 index 000000000000..eb5a2208d260 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json @@ -0,0 +1,464 @@ +[ + { + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", + "EventCode": "0x10A", + "EventName": "L2_PREFETCH_REFILL", + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" + }, + { + "PublicDescription": "Level 2 prefetch requests, late", + "EventCode": "0x10B", + "EventName": "L2_PREFETCH_UPGRADE", + "BriefDescription": "Level 2 prefetch requests, late" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", + "EventCode": "0x110", + "EventName": "BPU_HIT_BTB", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", + "EventCode": "0x111", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", + "EventCode": "0x112", + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", + "EventCode": "0x113", + "EventName": "BPU_HIT_RSB", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" + }, + { + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", + "EventCode": "0x114", + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted", + "EventCode": "0x115", + "EventName": "BPU_BRANCH_NO_HIT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", + "EventCode": "0x116", + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", + "EventCode": "0x117", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", + "EventCode": "0x118", + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", + "EventCode": "0x119", + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", + "EventCode": "0x11a", + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11b", + "EventName": "BPU_NO_PREDICTION_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Preditable branch update the BTB region buffer entry", + "EventCode": "0x11c", + "EventName": "BPU_BTB_UPDATE", + "BriefDescription": "Preditable branch update the BTB region buffer entry" + }, + { + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", + "EventCode": "0x11d", + "EventName": "BPU_RSB_FULL_STALL", + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" + }, + { + "PublicDescription": "Macro-ops speculatively decoded", + "EventCode": "0x11f", + "EventName": "ICF_INST_SPEC_DECODE", + "BriefDescription": "Macro-ops speculatively decoded" + }, + { + "PublicDescription": "Flushes", + "EventCode": "0x120", + "EventName": "GPC_FLUSH", + "BriefDescription": "Flushes" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "GPC_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + }, + { + "PublicDescription": "ETM extout bit 0", + "EventCode": "0x141", + "EventName": "MSC_ETM_EXTOUT0", + "BriefDescription": "ETM extout bit 0" + }, + { + "PublicDescription": "ETM extout bit 1", + "EventCode": "0x142", + "EventName": "MSC_ETM_EXTOUT1", + "BriefDescription": "ETM extout bit 1" + }, + { + "PublicDescription": "ETM extout bit 2", + "EventCode": "0x143", + "EventName": "MSC_ETM_EXTOUT2", + "BriefDescription": "ETM extout bit 2" + }, + { + "PublicDescription": "ETM extout bit 3", + "EventCode": "0x144", + "EventName": "MSC_ETM_EXTOUT3", + "BriefDescription": "ETM extout bit 3" + }, + { + "PublicDescription": "Bus request sn", + "EventCode": "0x156", + "EventName": "L2C_SNOOP", + "BriefDescription": "Bus request sn" + }, + { + "PublicDescription": "L2 TXDAT LCRD blocked", + "EventCode": "0x169", + "EventName": "L2C_DAT_CRD_STALL", + "BriefDescription": "L2 TXDAT LCRD blocked" + }, + { + "PublicDescription": "L2 TXRSP LCRD blocked", + "EventCode": "0x16a", + "EventName": "L2C_RSP_CRD_STALL", + "BriefDescription": "L2 TXRSP LCRD blocked" + }, + { + "PublicDescription": "L2 TXREQ LCRD blocked", + "EventCode": "0x16b", + "EventName": "L2C_REQ_CRD_STALL", + "BriefDescription": "L2 TXREQ LCRD blocked" + }, + { + "PublicDescription": "Early mispredict", + "EventCode": "0xD100", + "EventName": "ICF_EARLY_MIS_PRED", + "BriefDescription": "Early mispredict" + }, + { + "PublicDescription": "FEQ full cycles", + "EventCode": "0xD101", + "EventName": "ICF_FEQ_FULL", + "BriefDescription": "FEQ full cycles" + }, + { + "PublicDescription": "Instruction FIFO Full", + "EventCode": "0xD102", + "EventName": "ICF_INST_FIFO_FULL", + "BriefDescription": "Instruction FIFO Full" + }, + { + "PublicDescription": "L1I TLB miss", + "EventCode": "0xD103", + "EventName": "L1I_TLB_MISS", + "BriefDescription": "L1I TLB miss" + }, + { + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", + "EventCode": "0xD104", + "EventName": "ICF_STALL", + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" + }, + { + "PublicDescription": "PC FIFO Full", + "EventCode": "0xD105", + "EventName": "ICF_PC_FIFO_FULL", + "BriefDescription": "PC FIFO Full" + }, + { + "PublicDescription": "Stall due to BOB ID", + "EventCode": "0xD200", + "EventName": "IDR_STALL_BOB_ID", + "BriefDescription": "Stall due to BOB ID" + }, + { + "PublicDescription": "Dispatch stall due to LOB entries", + "EventCode": "0xD201", + "EventName": "IDR_STALL_LOB_ID", + "BriefDescription": "Dispatch stall due to LOB entries" + }, + { + "PublicDescription": "Dispatch stall due to SOB entries", + "EventCode": "0xD202", + "EventName": "IDR_STALL_SOB_ID", + "BriefDescription": "Dispatch stall due to SOB entries" + }, + { + "PublicDescription": "Dispatch stall due to IXU scheduler entries", + "EventCode": "0xD203", + "EventName": "IDR_STALL_IXU_SCHED", + "BriefDescription": "Dispatch stall due to IXU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to FSU scheduler entries", + "EventCode": "0xD204", + "EventName": "IDR_STALL_FSU_SCHED", + "BriefDescription": "Dispatch stall due to FSU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to ROB entries", + "EventCode": "0xD205", + "EventName": "IDR_STALL_ROB_ID", + "BriefDescription": "Dispatch stall due to ROB entries" + }, + { + "PublicDescription": "Dispatch stall due to flush", + "EventCode": "0xD206", + "EventName": "IDR_STALL_FLUSH", + "BriefDescription": "Dispatch stall due to flush" + }, + { + "PublicDescription": "Dispatch stall due to WFI", + "EventCode": "0xD207", + "EventName": "IDR_STALL_WFI", + "BriefDescription": "Dispatch stall due to WFI" + }, + { + "PublicDescription": "Number of SWOB drains triggered by timeout", + "EventCode": "0xD208", + "EventName": "IDR_STALL_SWOB_TIMEOUT", + "BriefDescription": "Number of SWOB drains triggered by timeout" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", + "EventCode": "0xD209", + "EventName": "IDR_STALL_SWOB_RAW", + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", + "EventCode": "0xD20A", + "EventName": "IDR_STALL_SWOB_FULL", + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", + "EventCode": "0xD20B", + "EventName": "STALL_FRONTEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data cache miss", + "EventCode": "0xD20D", + "EventName": "STALL_BACKEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 data cache miss" + }, + { + "PublicDescription": "Dispatch stall due to lack of any core resource", + "EventCode": "0xD20F", + "EventName": "STALL_BACKEND_RESOURCE", + "BriefDescription": "Dispatch stall due to lack of any core resource" + }, + { + "PublicDescription": "Instructions issued by the scheduler", + "EventCode": "0xD300", + "EventName": "IXU_NUM_UOPS_ISSUED", + "BriefDescription": "Instructions issued by the scheduler" + }, + { + "PublicDescription": "Any uop issued was canceled for any reason", + "EventCode": "0xD301", + "EventName": "IXU_ISSUE_CANCEL", + "BriefDescription": "Any uop issued was canceled for any reason" + }, + { + "PublicDescription": "A load wakeup to the scheduler has been canceled", + "EventCode": "0xD302", + "EventName": "IXU_LOAD_CANCEL", + "BriefDescription": "A load wakeup to the scheduler has been canceled" + }, + { + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", + "EventCode": "0xD303", + "EventName": "IXU_SLOW_CANCEL", + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA", + "EventCode": "0xD304", + "EventName": "IXU_IXA_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", + "EventCode": "0xD305", + "EventName": "IXU_IXA_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", + "EventCode": "0xD306", + "EventName": "IXU_IXA_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB", + "EventCode": "0xD307", + "EventName": "IXU_IXB_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", + "EventCode": "0xD308", + "EventName": "IXU_IXB_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", + "EventCode": "0xD309", + "EventName": "IXU_IXB_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC", + "EventCode": "0xD30A", + "EventName": "IXU_IXC_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", + "EventCode": "0xD30B", + "EventName": "IXU_IXC_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", + "EventCode": "0xD30C", + "EventName": "IXU_IXC_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD", + "EventCode": "0xD30D", + "EventName": "IXU_IXD_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", + "EventCode": "0xD30E", + "EventName": "IXU_IXD_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", + "EventCode": "0xD30F", + "EventName": "IXU_IXD_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" + }, + { + "PublicDescription": "Uops issued by the FSU scheduler", + "EventCode": "0xD400", + "EventName": "FSU_ISSUED", + "BriefDescription": "Uops issued by the FSU scheduler" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSX", + "EventCode": "0xD401", + "EventName": "FSU_FSX_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSX" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSY", + "EventCode": "0xD402", + "EventName": "FSU_FSY_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSY" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSZ", + "EventCode": "0xD403", + "EventName": "FSU_FSZ_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSZ" + }, + { + "PublicDescription": "Uops canceled (load cancels)", + "EventCode": "0xD404", + "EventName": "FSU_CANCEL", + "BriefDescription": "Uops canceled (load cancels)" + }, + { + "PublicDescription": "Count scheduler stalls due to divide/sqrt", + "EventCode": "0xD405", + "EventName": "FSU_DIV_SQRT_STALL", + "BriefDescription": "Count scheduler stalls due to divide/sqrt" + }, + { + "PublicDescription": "Number of SWOB drains", + "EventCode": "0xD500", + "EventName": "GPC_SWOB_DRAIN", + "BriefDescription": "Number of SWOB drains" + }, + { + "PublicDescription": "GPC detected a Breakpoint instruction match", + "EventCode": "0xD501", + "EventName": "BREAKPOINT_MATCH", + "BriefDescription": "GPC detected a Breakpoint instruction match" + }, + { + "PublicDescription": "Core progress monitor triggered", + "EventCode": "0xd502", + "EventName": "GPC_CPM_TRIGGER", + "BriefDescription": "Core progress monitor triggered" + }, + { + "PublicDescription": "Fill buffer full", + "EventCode": "0xD601", + "EventName": "OFB_FULL", + "BriefDescription": "Fill buffer full" + }, + { + "PublicDescription": "Load satisified from store forwarded data", + "EventCode": "0xD605", + "EventName": "LD_FROM_ST_FWD", + "BriefDescription": "Load satisified from store forwarded data" + }, + { + "PublicDescription": "Store retirement pipe stall", + "EventCode": "0xD60C", + "EventName": "LSU_ST_RETIRE_STALL", + "BriefDescription": "Store retirement pipe stall" + }, + { + "PublicDescription": "LSU detected a Watchpoint data match", + "EventCode": "0xD60D", + "EventName": "WATCHPOINT_MATCH", + "BriefDescription": "LSU detected a Watchpoint data match" + }, + { + "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature", + "EventCode": "0xda00", + "EventName": "MSC_ETM_COMMIT_STALL", + "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json new file mode 100644 index 000000000000..bd59ba7b74e4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "EXC_SMC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json new file mode 100644 index 000000000000..a6a20f541e33 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "ST_RETIRED" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_RETIRED" + }, + { + "ArchStdEvent": "BR_RETURN_RETIRED" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "PC_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Scalar", + "EventCode": "0xd210", + "EventName": "ASE_SCALAR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Scalar" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Vector", + "EventCode": "0xd211", + "EventName": "ASE_VECTOR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Vector" + }, + { + "PublicDescription": "Barrier speculatively executed, CSDB", + "EventCode": "0x7f", + "EventName": "CSDB_SPEC", + "BriefDescription": "Barrier speculatively executed, CSDB" + }, + { + "PublicDescription": "Prefetch sent to L2.", + "EventCode": "0xd106", + "EventName": "ICF_PREFETCH_DISPATCH", + "BriefDescription": "Prefetch sent to L2." + }, + { + "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.", + "EventCode": "0xd107", + "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE", + "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades." + }, + { + "PublicDescription": "Prefetch request missed TLB.", + "EventCode": "0xd108", + "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS", + "BriefDescription": "Prefetch request missed TLB." + }, + { + "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.", + "EventCode": "0xd109", + "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE", + "BriefDescription": "Prefetch request dropped since duplicate was found in TLB." + }, + { + "PublicDescription": "Prefetch request dropped since it was found in cache.", + "EventCode": "0xd10a", + "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT", + "BriefDescription": "Prefetch request dropped since it was found in cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json new file mode 100644 index 000000000000..7ecffb989ae0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json new file mode 100644 index 000000000000..a211d94aacde --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "LD_RETIRED" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "BPU_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json new file mode 100644 index 000000000000..c5d1d22bd034 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -0,0 +1,442 @@ +[ + { + "MetricName": "branch_miss_pred_rate", + "MetricExpr": "BR_MIS_PRED / BR_PRED", + "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch", + "MetricGroup": "branch", + "ScaleUnit": "100%" + }, + { + "MetricName": "bus_utilization", + "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)", + "BriefDescription": "Core-to-uncore bus utilization", + "MetricGroup": "Bus", + "ScaleUnit": "100percent of bus cycles" + }, + { + "MetricName": "l1d_cache_miss_ratio", + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.", + "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_miss_ratio", + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.", + "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "Miss_Ratio;l1d_cache_read_miss", + "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD", + "BriefDescription": "L1D cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l2_cache_miss_ratio", + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", + "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.", + "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_read_miss_rate", + "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE", + "BriefDescription": "L1I cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l2d_cache_read_miss_rate", + "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD", + "BriefDescription": "L2 cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l1d_cache_miss_mpki", + "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (data)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "l1i_cache_miss_mpki", + "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (instruction)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "simd_percentage", + "MetricExpr": "ASE_SPEC / INST_SPEC", + "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "crypto_percentage", + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", + "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "gflops", + "MetricExpr": "VFP_SPEC / (duration_time * 1e9)", + "BriefDescription": "Giga-floating point operations per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "integer_dp_percentage", + "MetricExpr": "DP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "ipc", + "MetricExpr": "INST_RETIRED / CPU_CYCLES", + "BriefDescription": "This metric measures the number of instructions retired per cycle.", + "MetricGroup": "General", + "ScaleUnit": "1per cycle" + }, + { + "MetricName": "load_percentage", + "MetricExpr": "LD_SPEC / INST_SPEC", + "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "load_store_spec_rate", + "MetricExpr": "LDST_SPEC / INST_SPEC", + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_mips", + "MetricExpr": "INST_RETIRED / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "spec_utilization_mips", + "MetricExpr": "INST_SPEC / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "PEutilization" + }, + { + "MetricName": "pc_write_spec_rate", + "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "store_percentage", + "MetricExpr": "ST_SPEC / INST_SPEC", + "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "scalar_fp_percentage", + "MetricExpr": "VFP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_rate", + "MetricExpr": "OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted", + "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))", + "BriefDescription": "Of all the micro-operations issued, what proportion are lost", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted_rate", + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "stall_backend_cache_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_resource_rate", + "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_tlb_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_cache_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_tlb_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "dtlb_walk_ratio", + "MetricExpr": "DTLB_WALK / L1D_TLB", + "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.", + "MetricGroup": "Miss_Ratio;DTLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "MetricName": "itlb_walk_ratio", + "MetricExpr": "ITLB_WALK / L1I_TLB", + "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.", + "MetricGroup": "Miss_Ratio;ITLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "ArchStdEvent": "backend_bound" + }, + { + "ArchStdEvent": "frontend_bound", + "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)" + }, + { + "MetricName": "slots_lost_misspeculation_fraction", + "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "retired_fraction", + "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "backend_core", + "MetricExpr": "(backend_bound / 100) - backend_memory", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "backend_memory", + "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "branch_mispredict", + "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction", + "BriefDescription": "Fraction of slots lost due to branch misprediciton", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_bandwidth", + "MetricExpr": "frontend_bound - frontend_latency", + "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_latency", + "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "other_miss_pred", + "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict", + "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "pipe_utilization", + "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))", + "BriefDescription": "Fraction of execute slots utilized", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "d_cache_l2_miss_rate", + "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_cache_miss_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_tlb_miss_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "fsu_pipe_utilization", + "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)", + "BriefDescription": "Fraction of FSU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_cache_miss_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_tlb_miss_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "ixu_pipe_utilization", + "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of IXU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_recovery_rate", + "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_fsu_sched_rate", + "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_ixu_sched_rate", + "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_lob_id_rate", + "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_rob_id_rate", + "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_sob_id_rate", + "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "l1d_cache_access_demand", + "MetricExpr": "L1D_CACHE_RW / L1D_CACHE", + "BriefDescription": "L1D cache access - demand", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_access_prefetces", + "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache access - prefetch", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses", + "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE", + "BriefDescription": "L1D cache demand misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_read", + "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - read", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_write", + "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - write", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_prefetch_misses", + "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache prefetch misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_scalar_mix", + "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_vector_mix", + "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json new file mode 100644 index 000000000000..66d83b680651 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json @@ -0,0 +1,170 @@ +[ + { + "PublicDescription": "Level 2 data translation buffer allocation", + "EventCode": "0xD800", + "EventName": "MMU_D_OTB_ALLOC", + "BriefDescription": "Level 2 data translation buffer allocation" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd801", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd802", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd803", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd804", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd805", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd806", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Data-side S1 page walk cache lookup", + "EventCode": "0xd807", + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S1 page walk cache refill", + "EventCode": "0xd808", + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Data-side S2 page walk cache lookup", + "EventCode": "0xd809", + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S2 page walk cache refill", + "EventCode": "0xd80a", + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S2 page walk cache refill" + }, + { + "PublicDescription": "Data-side S1 table walk fault", + "EventCode": "0xD80B", + "EventName": "MMU_D_S1_WALK_FAULT", + "BriefDescription": "Data-side S1 table walk fault" + }, + { + "PublicDescription": "Data-side S2 table walk fault", + "EventCode": "0xD80C", + "EventName": "MMU_D_S2_WALK_FAULT", + "BriefDescription": "Data-side S2 table walk fault" + }, + { + "PublicDescription": "Data-side table walk steps or descriptor fetches", + "EventCode": "0xD80D", + "EventName": "MMU_D_WALK_STEPS", + "BriefDescription": "Data-side table walk steps or descriptor fetches" + }, + { + "PublicDescription": "Level 2 instruction translation buffer allocation", + "EventCode": "0xD900", + "EventName": "MMU_I_OTB_ALLOC", + "BriefDescription": "Level 2 instruction translation buffer allocation" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd901", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd902", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd903", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd904", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd905", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd906", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache lookup", + "EventCode": "0xd907", + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache refill", + "EventCode": "0xd908", + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache lookup", + "EventCode": "0xd909", + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache refill", + "EventCode": "0xd90a", + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S2 page walk cache refill" + }, + { + "PublicDescription": "Instruction-side S1 table walk fault", + "EventCode": "0xD90B", + "EventName": "MMU_I_S1_WALK_FAULT", + "BriefDescription": "Instruction-side S1 table walk fault" + }, + { + "PublicDescription": "Instruction-side S2 table walk fault", + "EventCode": "0xD90C", + "EventName": "MMU_I_S2_WALK_FAULT", + "BriefDescription": "Instruction-side S2 table walk fault" + }, + { + "PublicDescription": "Instruction-side table walk steps or descriptor fetches", + "EventCode": "0xD90D", + "EventName": "MMU_I_WALK_STEPS", + "BriefDescription": "Instruction-side table walk steps or descriptor fetches" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json new file mode 100644 index 000000000000..2fb2d1f183fc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + }, + { + "PublicDescription": "Frontend stall cycles, TLB", + "EventCode": "0x815c", + "EventName": "STALL_FRONTEND_TLB", + "BriefDescription": "Frontend stall cycles, TLB" + }, + { + "PublicDescription": "Backend stall cycles, TLB", + "EventCode": "0x8167", + "EventName": "STALL_BACKEND_TLB", + "BriefDescription": "Backend stall cycles, TLB" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json index 428605c37d10..5ec157c39f0d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json @@ -107,7 +107,7 @@ "EventName": "hnf_qos_hh_retry", "EventidCode": "0xe", "NodeType": "0x5", - "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN‑F.", + "BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN-F.", "Unit": "arm_cmn", "Compat": "(434|436|43c|43a).*" }, diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 5b58db5032c1..f4d1ca4d1493 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -42,3 +42,4 @@ 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core 0x00000000c00fac30,v1,ampere/ampereone,core +0x00000000c00fac40,v1,ampere/ampereonex,core diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index f4908af7ad66..599a588dbeb4 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -11,8 +11,7 @@ # # Multiple PVRs could map to a single JSON file. # - -# Power8 entries 0x004[bcd][[:xdigit:]]{4},1,power8,core +0x0066[[:xdigit:]]{4},1,power8,core 0x004e[[:xdigit:]]{4},1,power9,core 0x0080[[:xdigit:]]{4},1,power10,core diff --git a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json index 6b0356f2d301..0eeaaf1a95b8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json @@ -100,6 +100,11 @@ "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss." }, { + "EventCode": "0x0003C0000000C040", + "EventName": "PM_DATA_FROM_L2MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss." + }, + { "EventCode": "0x000380000010C040", "EventName": "PM_INST_FROM_L2MISS_ALL", "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss or prefetch reload." @@ -161,10 +166,15 @@ }, { "EventCode": "0x000780000000C040", - "EventName": "PM_INST_FROM_L3MISS", + "EventName": "PM_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss." }, { + "EventCode": "0x0007C0000000C040", + "EventName": "PM_DATA_FROM_L3MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss." + }, + { "EventCode": "0x000780000010C040", "EventName": "PM_INST_FROM_L3MISS_ALL", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss or prefetch reload." @@ -981,7 +991,7 @@ }, { "EventCode": "0x0003C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L2MISS", + "EventName": "PM_MRK_DATA_FROM_L2MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction." }, { @@ -1046,12 +1056,12 @@ }, { "EventCode": "0x000780000000C142", - "EventName": "PM_MRK_INST_FROM_L3MISS", + "EventName": "PM_MRK_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { "EventCode": "0x0007C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L3MISS", + "EventName": "PM_MRK_DATA_FROM_L3MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv index c61b3d6ef616..cfc449b19810 100644 --- a/tools/perf/pmu-events/arch/riscv/mapfile.csv +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -15,3 +15,5 @@ # #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core +0x5b7-0x0-0x0,v1,thead/c900-legacy,core +0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json new file mode 100644 index 000000000000..fbffcacb2ace --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json @@ -0,0 +1,172 @@ +[ + { + "EventName": "ACCESS_MMU_STLB", + "EventCode": "0x1", + "BriefDescription": "access MMU STLB" + }, + { + "EventName": "MISS_MMU_STLB", + "EventCode": "0x2", + "BriefDescription": "miss MMU STLB" + }, + { + "EventName": "ACCESS_MMU_PTE_C", + "EventCode": "0x3", + "BriefDescription": "access MMU PTE-Cache" + }, + { + "EventName": "MISS_MMU_PTE_C", + "EventCode": "0x4", + "BriefDescription": "miss MMU PTE-Cache" + }, + { + "EventName": "ROB_FLUSH", + "EventCode": "0x5", + "BriefDescription": "ROB flush (all kinds of exceptions)" + }, + { + "EventName": "BTB_PREDICTION_MISS", + "EventCode": "0x6", + "BriefDescription": "BTB prediction miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x7", + "BriefDescription": "ITLB miss" + }, + { + "EventName": "SYNC_DEL_FETCH_G", + "EventCode": "0x8", + "BriefDescription": "SYNC delivery a fetch-group" + }, + { + "EventName": "ICACHE_MISS", + "EventCode": "0x9", + "BriefDescription": "ICache miss" + }, + { + "EventName": "BPU_BR_RETIRE", + "EventCode": "0xA", + "BriefDescription": "condition branch instruction retire" + }, + { + "EventName": "BPU_BR_MISS", + "EventCode": "0xB", + "BriefDescription": "condition branch instruction miss" + }, + { + "EventName": "RET_INS_RETIRE", + "EventCode": "0xC", + "BriefDescription": "return instruction retire" + }, + { + "EventName": "RET_INS_MISS", + "EventCode": "0xD", + "BriefDescription": "return instruction miss" + }, + { + "EventName": "INDIRECT_JR_MISS", + "EventCode": "0xE", + "BriefDescription": "indirect JR instruction miss (inlcude without target)" + }, + { + "EventName": "IBUF_VAL_ID_NORDY", + "EventCode": "0xF", + "BriefDescription": "IBUF valid while ID not ready" + }, + { + "EventName": "IBUF_NOVAL_ID_RDY", + "EventCode": "0x10", + "BriefDescription": "IBUF not valid while ID ready" + }, + { + "EventName": "REN_INT_PHY_REG_NORDY", + "EventCode": "0x11", + "BriefDescription": "REN integer physical register file is not ready" + }, + { + "EventName": "REN_FP_PHY_REG_NORDY", + "EventCode": "0x12", + "BriefDescription": "REN floating point physical register file is not ready" + }, + { + "EventName": "REN_CP_NORDY", + "EventCode": "0x13", + "BriefDescription": "REN checkpoint is not ready" + }, + { + "EventName": "DEC_VAL_ROB_NORDY", + "EventCode": "0x14", + "BriefDescription": "DEC is valid and ROB is not ready" + }, + { + "EventName": "OOD_FLUSH_LS_DEP", + "EventCode": "0x15", + "BriefDescription": "out of order flush due to load/store dependency" + }, + { + "EventName": "BRU_RET_IJR_INS", + "EventCode": "0x16", + "BriefDescription": "BRU retire an IJR instruction" + }, + { + "EventName": "ACCESS_DTLB", + "EventCode": "0x17", + "BriefDescription": "access DTLB" + }, + { + "EventName": "MISS_DTLB", + "EventCode": "0x18", + "BriefDescription": "miss DTLB" + }, + { + "EventName": "LOAD_INS_DCACHE", + "EventCode": "0x19", + "BriefDescription": "load instruction access DCache" + }, + { + "EventName": "LOAD_INS_MISS_DCACHE", + "EventCode": "0x1A", + "BriefDescription": "load instruction miss DCache" + }, + { + "EventName": "STORE_INS_DCACHE", + "EventCode": "0x1B", + "BriefDescription": "store/amo instruction access DCache" + }, + { + "EventName": "STORE_INS_MISS_DCACHE", + "EventCode": "0x1C", + "BriefDescription": "store/amo instruction miss DCache" + }, + { + "EventName": "LOAD_SCACHE", + "EventCode": "0x1D", + "BriefDescription": "load access SCache" + }, + { + "EventName": "STORE_SCACHE", + "EventCode": "0x1E", + "BriefDescription": "store access SCache" + }, + { + "EventName": "LOAD_MISS_SCACHE", + "EventCode": "0x1F", + "BriefDescription": "load miss SCache" + }, + { + "EventName": "STORE_MISS_SCACHE", + "EventCode": "0x20", + "BriefDescription": "store miss SCache" + }, + { + "EventName": "L2C_PF_REQ", + "EventCode": "0x21", + "BriefDescription": "L2C data-prefetcher request" + }, + { + "EventName": "L2C_PF_HIT", + "EventCode": "0x22", + "BriefDescription": "L2C data-prefetcher hit" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json new file mode 100644 index 000000000000..2b142348d635 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json @@ -0,0 +1,67 @@ +[ + { + "EventName": "L1_ICACHE_ACCESS", + "EventCode": "0x00000001", + "BriefDescription": "L1 instruction cache access" + }, + { + "EventName": "L1_ICACHE_MISS", + "EventCode": "0x00000002", + "BriefDescription": "L1 instruction cache miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x00000003", + "BriefDescription": "I-UTLB miss" + }, + { + "EventName": "DTLB_MISS", + "EventCode": "0x00000004", + "BriefDescription": "D-UTLB miss" + }, + { + "EventName": "JTLB_MISS", + "EventCode": "0x00000005", + "BriefDescription": "JTLB miss" + }, + { + "EventName": "L1_DCACHE_READ_ACCESS", + "EventCode": "0x0000000c", + "BriefDescription": "L1 data cache read access" + }, + { + "EventName": "L1_DCACHE_READ_MISS", + "EventCode": "0x0000000d", + "BriefDescription": "L1 data cache read miss" + }, + { + "EventName": "L1_DCACHE_WRITE_ACCESS", + "EventCode": "0x0000000e", + "BriefDescription": "L1 data cache write access" + }, + { + "EventName": "L1_DCACHE_WRITE_MISS", + "EventCode": "0x0000000f", + "BriefDescription": "L1 data cache write miss" + }, + { + "EventName": "LL_CACHE_READ_ACCESS", + "EventCode": "0x00000010", + "BriefDescription": "LL Cache read access" + }, + { + "EventName": "LL_CACHE_READ_MISS", + "EventCode": "0x00000011", + "BriefDescription": "LL Cache read miss" + }, + { + "EventName": "LL_CACHE_WRITE_ACCESS", + "EventCode": "0x00000012", + "BriefDescription": "LL Cache write access" + }, + { + "EventName": "LL_CACHE_WRITE_MISS", + "EventCode": "0x00000013", + "BriefDescription": "LL Cache write miss" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json new file mode 100644 index 000000000000..c822b5373333 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json @@ -0,0 +1,72 @@ +[ + { + "EventName": "INST_BRANCH_MISPREDICT", + "EventCode": "0x00000006", + "BriefDescription": "Mispredicted branch instructions" + }, + { + "EventName": "INST_BRANCH", + "EventCode": "0x00000007", + "BriefDescription": "Retired branch instructions" + }, + { + "EventName": "INST_JMP_MISPREDICT", + "EventCode": "0x00000008", + "BriefDescription": "Indirect branch mispredict" + }, + { + "EventName": "INST_JMP", + "EventCode": "0x00000009", + "BriefDescription": "Retired jmp instructions" + }, + { + "EventName": "INST_STORE", + "EventCode": "0x0000000b", + "BriefDescription": "Retired store instructions" + }, + { + "EventName": "INST_ALU", + "EventCode": "0x0000001d", + "BriefDescription": "Retired ALU instructions" + }, + { + "EventName": "INST_LDST", + "EventCode": "0x0000001e", + "BriefDescription": "Retired Load/Store instructions" + }, + { + "EventName": "INST_VECTOR", + "EventCode": "0x0000001f", + "BriefDescription": "Retired Vector instructions" + }, + { + "EventName": "INST_CSR", + "EventCode": "0x00000020", + "BriefDescription": "Retired CSR instructions" + }, + { + "EventName": "INST_SYNC", + "EventCode": "0x00000021", + "BriefDescription": "Retired sync instructions (AMO/LR/SC instructions)" + }, + { + "EventName": "INST_UNALIGNED_ACCESS", + "EventCode": "0x00000022", + "BriefDescription": "Retired Store/Load instructions with unaligned memory access" + }, + { + "EventName": "INST_ECALL", + "EventCode": "0x00000025", + "BriefDescription": "Retired ecall instructions" + }, + { + "EventName": "INST_LONG_JP", + "EventCode": "0x00000026", + "BriefDescription": "Retired long jump instructions" + }, + { + "EventName": "INST_FP", + "EventCode": "0x0000002a", + "BriefDescription": "Retired FPU instructions" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json new file mode 100644 index 000000000000..0ab6f288af91 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json @@ -0,0 +1,80 @@ +[ + { + "EventName": "LSU_SPEC_FAIL", + "EventCode": "0x0000000a", + "BriefDescription": "LSU speculation fail" + }, + { + "EventName": "IDU_RF_PIPE_FAIL", + "EventCode": "0x00000014", + "BriefDescription": "Instruction decode unit launch pipeline failed in RF state" + }, + { + "EventName": "IDU_RF_REG_FAIL", + "EventCode": "0x00000015", + "BriefDescription": "Instruction decode unit launch register file fail in RF state" + }, + { + "EventName": "IDU_RF_INSTRUCTION", + "EventCode": "0x00000016", + "BriefDescription": "retired instruction count of Instruction decode unit in RF (Register File) stage" + }, + { + "EventName": "LSU_4K_STALL", + "EventCode": "0x00000017", + "BriefDescription": "LSU stall times for long distance data access (Over 4K)", + "PublicDescription": "This stall occurs when translate virtual address with page offset over 4k" + }, + { + "EventName": "LSU_OTHER_STALL", + "EventCode": "0x00000018", + "BriefDescription": "LSU stall times for other reasons (except the 4k stall)" + }, + { + "EventName": "LSU_SQ_OTHER_DIS", + "EventCode": "0x00000019", + "BriefDescription": "LSU store queue discard others" + }, + { + "EventName": "LSU_SQ_DATA_DISCARD", + "EventCode": "0x0000001a", + "BriefDescription": "LSU store queue discard data (uops)" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0000001b", + "BriefDescription": "Branch misprediction in BTB" + }, + { + "EventName": "BRANCH_DIRECTION_PREDICTION", + "EventCode": "0x0000001c", + "BriefDescription": "All branch prediction in BTB", + "PublicDescription": "This event including both successful prediction and failed prediction in BTB" + }, + { + "EventName": "INTERRUPT_ACK_COUNT", + "EventCode": "0x00000023", + "BriefDescription": "acknowledged interrupt count" + }, + { + "EventName": "INTERRUPT_OFF_CYCLE", + "EventCode": "0x00000024", + "BriefDescription": "PLIC arbitration time when the interrupt is not responded", + "PublicDescription": "The arbitration time is recorded while meeting any of the following:\n- CPU is M-mode and MIE == 0\n- CPU is S-mode and delegation and SIE == 0\n" + }, + { + "EventName": "IFU_STALLED_CYCLE", + "EventCode": "0x00000027", + "BriefDescription": "Number of stall cycles of the instruction fetch unit (IFU)." + }, + { + "EventName": "IDU_STALLED_CYCLE", + "EventCode": "0x00000028", + "BriefDescription": "hpcp_backend_stall Number of stall cycles of the instruction decoding unit (IDU) and next-level pipeline unit." + }, + { + "EventName": "SYNC_STALL", + "EventCode": "0x00000029", + "BriefDescription": "Sync instruction stall cycle fence/fence.i/sync/sfence" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 3388b58b8f1a..35124a4ddcb2 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -70,12 +70,6 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Uncore frequency per die [GHZ]", - "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", - "MetricGroup": "SoC", - "MetricName": "UNCORE_FREQ" - }, - { "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", "MetricGroup": "smi", @@ -810,6 +804,13 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Uncore frequency per die [GHZ]", + "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9", + "MetricGroup": "SoC", + "MetricName": "UNCORE_FREQ", + "Unit": "cpu_core" + }, + { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.", "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)", "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", @@ -1838,7 +1839,7 @@ }, { "BriefDescription": "Average number of parallel data read requests to external memory", - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "tma_info_system_mem_parallel_reads", "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json new file mode 100644 index 000000000000..55263e5e4f69 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json @@ -0,0 +1,101 @@ +[ + { + "EventName": "umc_mem_clk", + "PublicDescription": "Number of memory clock cycles.", + "EventCode": "0x00", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.all", + "PublicDescription": "Number of ACTIVATE commands sent.", + "EventCode": "0x05", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.rd", + "PublicDescription": "Number of ACTIVATE commands sent for reads.", + "EventCode": "0x05", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_act_cmd.wr", + "PublicDescription": "Number of ACTIVATE commands sent for writes.", + "EventCode": "0x05", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.all", + "PublicDescription": "Number of PRECHARGE commands sent.", + "EventCode": "0x06", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.rd", + "PublicDescription": "Number of PRECHARGE commands sent for reads.", + "EventCode": "0x06", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_pchg_cmd.wr", + "PublicDescription": "Number of PRECHARGE commands sent for writes.", + "EventCode": "0x06", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.all", + "PublicDescription": "Number of CAS commands sent.", + "EventCode": "0x0a", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.rd", + "PublicDescription": "Number of CAS commands sent for reads.", + "EventCode": "0x0a", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_cas_cmd.wr", + "PublicDescription": "Number of CAS commands sent for writes.", + "EventCode": "0x0a", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.all", + "PublicDescription": "Number of clocks used by the data bus.", + "EventCode": "0x14", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.rd", + "PublicDescription": "Number of clocks used by the data bus for reads.", + "EventCode": "0x14", + "RdWrMask": "0x1", + "PerPkg": "1", + "Unit": "UMCPMC" + }, + { + "EventName": "umc_data_slot_clks.wr", + "PublicDescription": "Number of clocks used by the data bus for writes.", + "EventCode": "0x14", + "RdWrMask": "0x2", + "PerPkg": "1", + "Unit": "UMCPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json index 5e6a793acf7b..96e06401c6cb 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json @@ -330,5 +330,89 @@ "MetricGroup": "data_fabric", "PerPkg": "1", "ScaleUnit": "6.103515625e-5MiB" + }, + { + "MetricName": "umc_data_bus_utilization", + "BriefDescription": "Memory controller data bus utilization.", + "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_write_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for writes.", + "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_mem_read_bandwidth", + "BriefDescription": "Estimated memory read bandwidth.", + "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_write_bandwidth", + "BriefDescription": "Estimated memory write bandwidth.", + "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_mem_bandwidth", + "BriefDescription": "Estimated combined memory bandwidth.", + "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "1MB/s" + }, + { + "MetricName": "umc_cas_cmd_read_ratio", + "BriefDescription": "Ratio of memory controller CAS commands for reads.", + "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)", + "MetricGroup": "memory_controller", + "PerPkg": "1", + "ScaleUnit": "100%" + }, + { + "MetricName": "umc_cas_cmd_rate", + "BriefDescription": "Memory controller CAS command rate.", + "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_activate_cmd_rate", + "BriefDescription": "Memory controller ACTIVATE command rate.", + "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" + }, + { + "MetricName": "umc_precharge_cmd_rate", + "BriefDescription": "Memory controller PRECHARGE command rate.", + "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)", + "MetricGroup": "memory_controller", + "PerPkg": "1" } ] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 84c132af3dfa..8bc6c0707856 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -1863,6 +1863,12 @@ "ScaleUnit": "1GHz" }, { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", "MetricName": "upi_data_transmit_bw", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json index 4a9d211e9d4f..1bdefaf96287 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json @@ -23,27 +23,48 @@ "UMask": "0x10" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", "SampleAfterValue": "2000003", "UMask": "0x2" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", "SampleAfterValue": "2000003", "UMask": "0x4" }, { + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V0", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V2", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json index 6dcf3b763af4..1f8200fb8964 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json @@ -1,21 +1,5 @@ [ { - "BriefDescription": "AMX retired arithmetic BF16 operations.", - "EventCode": "0xce", - "EventName": "AMX_OPS_RETIRED.BF16", - "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4", - "SampleAfterValue": "1000003", - "UMask": "0x2" - }, - { - "BriefDescription": "AMX retired arithmetic integer 8-bit operations.", - "EventCode": "0xce", - "EventName": "AMX_OPS_RETIRED.INT8", - "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.", - "SampleAfterValue": "1000003", - "UMask": "0x1" - }, - { "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", "CounterMask": "1", "Deprecated": "1", @@ -505,7 +489,7 @@ "UMask": "0x1" }, { - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json index 09d840c7da4c..65d088556bae 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json @@ -4825,11 +4825,11 @@ "Unit": "M3UPI" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", "PerPkg": "1", - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", "UMask": "0x1", "Unit": "MDF" }, @@ -4861,11 +4861,11 @@ "Unit": "MDF" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", "PerPkg": "1", - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", "UMask": "0x4", "Unit": "MDF" }, diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json index 557080b74ee5..0761980c34a0 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json @@ -1186,6 +1186,36 @@ "Unit": "IIO" }, { + "BriefDescription": ": IOTLB Hits to a 1G Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.1G_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", + "UMask": "0x10", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 2M Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.2M_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", + "UMask": "0x8", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 4K Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.4K_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": ": Context cache hits", "EventCode": "0x40", "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index e98602c66707..71d78a7841ea 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -1847,6 +1847,12 @@ "ScaleUnit": "1GHz" }, { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", "MetricName": "upi_data_transmit_bw", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/other.json b/tools/perf/pmu-events/arch/x86/icelakex/other.json index 63d5faf2fc43..11810daaf150 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/other.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/other.json @@ -19,7 +19,7 @@ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", "EventCode": "0x28", "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.", "SampleAfterValue": "200003", "UMask": "0x20" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json index 176e5ef2a24a..45ee6bceba7f 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json @@ -519,7 +519,7 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "EventCode": "0x5e", "EventName": "RS_EVENTS.EMPTY_CYCLES", - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", "SampleAfterValue": "1000003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json index f87ea3f66d1b..a066a009c511 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json @@ -38,7 +38,7 @@ "EventCode": "0x10", "EventName": "UNC_I_COHERENT_OPS.CLFLUSH", "PerPkg": "1", - "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP", + "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP", "UMask": "0x80", "Unit": "IRP" }, @@ -65,7 +65,7 @@ "EventCode": "0x10", "EventName": "UNC_I_COHERENT_OPS.WBMTOI", "PerPkg": "1", - "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP", + "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP", "UMask": "0x40", "Unit": "IRP" }, @@ -454,7 +454,7 @@ "EventCode": "0x11", "EventName": "UNC_I_TRANSACTIONS.WRITES", "PerPkg": "1", - "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", + "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.", "UMask": "0x2", "Unit": "IRP" }, diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index e571683f59f3..4d1deed4437a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -7,7 +7,7 @@ GenuineIntel-6-56,v11,broadwellde,core GenuineIntel-6-4F,v22,broadwellx,core GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core GenuineIntel-6-9[6C],v1.04,elkhartlake,core -GenuineIntel-6-CF,v1.01,emeraldrapids,core +GenuineIntel-6-CF,v1.02,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core GenuineIntel-6-B6,v1.00,grandridge,core @@ -15,7 +15,7 @@ GenuineIntel-6-A[DE],v1.01,graniterapids,core GenuineIntel-6-(3C|45|46),v33,haswell,core GenuineIntel-6-3F,v28,haswellx,core GenuineIntel-6-7[DE],v1.19,icelake,core -GenuineIntel-6-6[AC],v1.21,icelakex,core +GenuineIntel-6-6[AC],v1.23,icelakex,core GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core @@ -26,7 +26,7 @@ GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core GenuineIntel-6-A7,v1.01,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-8F,v1.16,sapphirerapids,core +GenuineIntel-6-8F,v1.17,sapphirerapids,core GenuineIntel-6-AF,v1.00,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json index 0c880e415669..27433fc15ede 100644 --- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json @@ -985,7 +985,7 @@ }, { "BriefDescription": "Average number of parallel data read requests to external memory", - "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@", + "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@", "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "tma_info_system_mem_parallel_reads", "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches" diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json index 4a9d211e9d4f..1bdefaf96287 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json @@ -23,27 +23,48 @@ "UMask": "0x10" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", "SampleAfterValue": "2000003", "UMask": "0x2" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", "SampleAfterValue": "2000003", "UMask": "0x4" }, { + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V0", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V2", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json index 6dcf3b763af4..2cfe814d2015 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json @@ -505,7 +505,7 @@ "UMask": "0x1" }, { - "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", + "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index 06c6d67cb76b..e31a4aac9f20 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -1965,6 +1965,12 @@ "ScaleUnit": "1GHz" }, { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", "MetricName": "upi_data_transmit_bw", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json index 09d840c7da4c..65d088556bae 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json @@ -4825,11 +4825,11 @@ "Unit": "M3UPI" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC", "PerPkg": "1", - "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress", "UMask": "0x1", "Unit": "MDF" }, @@ -4861,11 +4861,11 @@ "Unit": "MDF" }, { - "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)", + "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)", "EventCode": "0x47", "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC", "PerPkg": "1", - "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress", + "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress", "UMask": "0x4", "Unit": "MDF" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json index 8b5f54fed103..03596db87710 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json @@ -1250,6 +1250,36 @@ "Unit": "IIO" }, { + "BriefDescription": ": IOTLB Hits to a 1G Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.1G_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.", + "UMask": "0x10", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 2M Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.2M_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.", + "UMask": "0x8", + "Unit": "IIO" + }, + { + "BriefDescription": ": IOTLB Hits to a 4K Page", + "EventCode": "0x40", + "EventName": "UNC_IIO_IOMMU0.4K_HITS", + "PerPkg": "1", + "PortMask": "0x0000", + "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": ": Context cache hits", "EventCode": "0x40", "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 4a8f8eeb7525..ec3aa5ef00a3 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1807,6 +1807,12 @@ "ScaleUnit": "1GHz" }, { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", "MetricName": "upi_data_transmit_bw", diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 3c091ab75305..53ab050c8fa4 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -83,7 +83,7 @@ def c_len(s: str) -> int: """Return the length of s a C string This doesn't handle all escape characters properly. It first assumes - all \ are for escaping, it then adjusts as it will have over counted + all \\ are for escaping, it then adjusts as it will have over counted \\. The code uses \000 rather than \0 as a terminator as an adjacent number would be folded into a string of \0 (ie. "\0" + "5" doesn't equal a terminator followed by the number 5 but the escape of @@ -286,6 +286,7 @@ class JsonEvent: 'imx8_ddr': 'imx8_ddr', 'L3PMC': 'amd_l3', 'DFPMC': 'amd_df', + 'UMCPMC': 'amd_umc', 'cpu_core': 'cpu_core', 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', @@ -354,6 +355,7 @@ class JsonEvent: ('SampleAfterValue', 'period='), ('UMask', 'umask='), ('NodeType', 'type='), + ('RdWrMask', 'rdwrmask='), ] for key, value in event_fields: if key in jd and jd[key] != '0': diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index d59ff53f1d94..d973c2baed1c 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -45,8 +45,8 @@ parser = OptionParser(option_list=option_list) # Initialize global dicts and regular expression disasm_cache = dict() cpu_data = dict() -disasm_re = re.compile("^\s*([0-9a-fA-F]+):") -disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") +disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 glb_source_file_name = None @@ -188,6 +188,17 @@ def process_event(param_dict): dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + ip = sample["ip"] + addr = sample["addr"] + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = addr + return + + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -209,16 +220,6 @@ def process_event(param_dict): if (name[0:8] != "branches"): return - cpu = sample["cpu"] - ip = sample["ip"] - addr = sample["addr"] - - # Initialize CPU data if it's empty, and directly return back - # if this is the first tracing event for this CPU. - if (cpu_data.get(str(cpu) + 'addr') == None): - cpu_data[str(cpu) + 'addr'] = addr - return - # The format for packet is: # # +------------+------------+------------+ @@ -258,8 +259,9 @@ def process_event(param_dict): if (options.objdump_name != None): # It doesn't need to decrease virtual memory offset for disassembly - # for kernel dso, so in this case we set vm_start to zero. - if (dso == "[kernel.kallsyms]"): + # for kernel dso and executable file dso, so in this case we set + # vm_start to zero. + if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 else: dso_vm_start = int(dso_start) diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py index 2560a042dc6f..9401f7c14747 100644 --- a/tools/perf/scripts/python/compaction-times.py +++ b/tools/perf/scripts/python/compaction-times.py @@ -260,7 +260,7 @@ def pr_help(): comm_re = None pid_re = None -pid_regex = "^(\d*)-(\d*)$|^(\d*)$" +pid_regex = r"^(\d*)-(\d*)$|^(\d*)$" opt_proc = popt.DISP_DFL opt_disp = topt.DISP_ALL diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 13f2d8a81610..121cf61ba1b3 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -677,8 +677,8 @@ class CallGraphModelBase(TreeModel): # sqlite supports GLOB (text only) which uses * and ? and is case sensitive if not self.glb.dbref.is_sqlite3: # Escape % and _ - s = value.replace("%", "\%") - s = s.replace("_", "\_") + s = value.replace("%", "\\%") + s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2b45ffa462a6..53ba9c3e20e0 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -77,3 +77,17 @@ CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUI CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls perf-y += workloads/ + +ifdef SHELLCHECK + SHELL_TESTS := $(shell find tests/shell -executable -type f -name '*.sh') + TEST_LOGS := $(SHELL_TESTS:tests/shell/%=shell/%.shellcheck_log) +else + SHELL_TESTS := + TEST_LOGS := +endif + +$(OUTPUT)%.shellcheck_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + +perf-y += $(TEST_LOGS) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 61186d0d1cfa..97e1bdd6ec0e 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -188,7 +188,7 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb if (perf_pmus__num_core_pmus() > 1) { /* * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type whic + * core PMU then each PMU will have a different type which * requires additional support. */ pr_debug("Skip test on hybrid systems"); diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 27c21271a16c..b44e4e6e4443 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -6,7 +6,7 @@ flags=0|8 cpu=* type=0|1 size=136 -config=0 +config=0|1 sample_period=* sample_type=263 read_format=0|4|20 diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 index fbb065842880..bed765450ca9 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 @@ -6,4 +6,4 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 129 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 index c598c803221d..a65113cd7311 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 @@ -6,7 +6,7 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 1 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0x200000 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0x400000 kernel_since = 6.1 [event:base-record] diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cb6f1dd00dc4..4a5973f9bb9b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,6 +14,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include "builtin.h" +#include "config.h" #include "hist.h" #include "intlist.h" #include "tests.h" @@ -32,6 +33,7 @@ static bool dont_fork; const char *dso_to_test; +const char *test_objdump_path = "objdump"; /* * List of architecture specific tests. Not a weak symbol as the array length is @@ -60,8 +62,6 @@ static struct test_suite *generic_tests[] = { &suite__pmu, &suite__pmu_events, &suite__dso_data, - &suite__dso_data_cache, - &suite__dso_data_reopen, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, @@ -513,6 +513,15 @@ static int run_workload(const char *work, int argc, const char **argv) return -1; } +static int perf_test__config(const char *var, const char *value, + void *data __maybe_unused) +{ + if (!strcmp(var, "annotate.objdump")) + test_objdump_path = value; + + return 0; +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -529,6 +538,8 @@ int cmd_test(int argc, const char **argv) "Do not fork for testcase"), OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), + OPT_STRING(0, "objdump", &test_objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; @@ -538,6 +549,8 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + perf_config(perf_test__config, NULL); + /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 3af81012014e..7a3a7bbbec71 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -185,7 +185,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, int ret; fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; - ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, + ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len, filename); if (ret <= 0 || (size_t)ret >= sizeof(cmd)) return -1; @@ -511,38 +511,6 @@ static void fs_something(void) } } -#ifdef __s390x__ -#include "header.h" // for get_cpuid() -#endif - -static const char *do_determine_event(bool excl_kernel) -{ - const char *event = excl_kernel ? "cycles:u" : "cycles"; - -#ifdef __s390x__ - char cpuid[128], model[16], model_c[16], cpum_cf_v[16]; - unsigned int family; - int ret, cpum_cf_a; - - if (get_cpuid(cpuid, sizeof(cpuid))) - goto out_clocks; - ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c, - model, cpum_cf_v, &cpum_cf_a); - if (ret != 5) /* Not available */ - goto out_clocks; - if (excl_kernel && (cpum_cf_a & 4)) - return event; - if (!excl_kernel && (cpum_cf_a & 2)) - return event; - - /* Fall through: missing authorization */ -out_clocks: - event = excl_kernel ? "cpu-clock:u" : "cpu-clock"; - -#endif - return event; -} - static void do_something(void) { fs_something(); @@ -583,8 +551,10 @@ static int do_test_code_reading(bool try_kcore) int err = -1, ret; pid_t pid; struct map *map; - bool have_vmlinux, have_kcore, excl_kernel = false; + bool have_vmlinux, have_kcore; struct dso *dso; + const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL }; + int evidx = 0; pid = getpid(); @@ -618,7 +588,7 @@ static int do_test_code_reading(bool try_kcore) /* No point getting kernel events if there is no kernel object */ if (!have_vmlinux && !have_kcore) - excl_kernel = true; + evidx++; threads = thread_map__new_by_tid(pid); if (!threads) { @@ -640,13 +610,13 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed\n"); goto out_put; } - while (1) { + while (events[evidx]) { const char *str; evlist = evlist__new(); @@ -657,7 +627,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__set_maps(&evlist->core, cpus, threads); - str = do_determine_event(excl_kernel); + str = events[evidx]; pr_debug("Parsing event '%s'\n", str); ret = parse_event(evlist, str); if (ret < 0) { @@ -675,32 +645,32 @@ static int do_test_code_reading(bool try_kcore) ret = evlist__open(evlist); if (ret < 0) { - if (!excl_kernel) { - excl_kernel = true; - /* - * Both cpus and threads are now owned by evlist - * and will be freed by following perf_evlist__set_maps - * call. Getting reference to keep them alive. - */ - perf_cpu_map__get(cpus); - perf_thread_map__get(threads); - perf_evlist__set_maps(&evlist->core, NULL, NULL); - evlist__delete(evlist); - evlist = NULL; - continue; - } + evidx++; - if (verbose > 0) { + if (events[evidx] == NULL && verbose > 0) { char errbuf[512]; evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } - goto out_put; + /* + * Both cpus and threads are now owned by evlist + * and will be freed by following perf_evlist__set_maps + * call. Getting reference to keep them alive. + */ + perf_cpu_map__get(cpus); + perf_thread_map__get(threads); + perf_evlist__set_maps(&evlist->core, NULL, NULL); + evlist__delete(evlist); + evlist = NULL; + continue; } break; } + if (events[evidx] == NULL) + goto out_put; + ret = evlist__mmap(evlist, UINT_MAX); if (ret < 0) { pr_debug("evlist__mmap failed\n"); @@ -721,7 +691,7 @@ static int do_test_code_reading(bool try_kcore) err = TEST_CODE_READING_NO_KERNEL_OBJ; else if (!have_vmlinux && !try_kcore) err = TEST_CODE_READING_NO_VMLINUX; - else if (excl_kernel) + else if (strstr(events[evidx], ":u")) err = TEST_CODE_READING_NO_ACCESS; else err = TEST_CODE_READING_OK; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7730fc2ab40b..bd8e396f3e57 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused, static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - struct perf_cpu_map *any = perf_cpu_map__dummy_new(); + struct perf_cpu_map *any = perf_cpu_map__new_any_cpu(); struct perf_cpu_map *one = perf_cpu_map__new("1"); struct perf_cpu_map *two = perf_cpu_map__new("2"); struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3419a4ab5590..2d67422c1222 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -394,6 +394,15 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub return 0; } -DEFINE_SUITE("DSO data read", dso_data); -DEFINE_SUITE("DSO data cache", dso_data_cache); -DEFINE_SUITE("DSO data reopen", dso_data_reopen); + +static struct test_case tests__dso_data[] = { + TEST_CASE("read", dso_data), + TEST_CASE("cache", dso_data_cache), + TEST_CASE("reopen", dso_data_reopen), + { .name = NULL, } +}; + +struct test_suite suite__dso_data = { + .desc = "DSO data tests", + .test_cases = tests__dso_data, +}; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 8f4f9b632e1e..5a3b2bed07f3 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/make b/tools/perf/tests/make index d9945ed25bc5..8a4da7eb637a 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -183,7 +183,7 @@ run += make_install_prefix_slash # run += make_install_pdf run += make_minimal -old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| egrep -q "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") +old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| grep -q -E "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") ifneq ($(old_libbpf),) run += make_libbpf_dynamic diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index 5bb1123a91a7..bb3fbfe5a73e 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -14,44 +14,59 @@ struct map_def { u64 end; }; +struct check_maps_cb_args { + struct map_def *merged; + unsigned int i; +}; + +static int check_maps_cb(struct map *map, void *data) +{ + struct check_maps_cb_args *args = data; + struct map_def *merged = &args->merged[args->i]; + + if (map__start(map) != merged->start || + map__end(map) != merged->end || + strcmp(map__dso(map)->name, merged->name) || + refcount_read(map__refcnt(map)) != 1) { + return 1; + } + args->i++; + return 0; +} + +static int failed_cb(struct map *map, void *data __maybe_unused) +{ + pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", + map__start(map), + map__end(map), + map__dso(map)->name, + refcount_read(map__refcnt(map))); + + return 0; +} + static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps) { - struct map_rb_node *rb_node; - unsigned int i = 0; bool failed = false; if (maps__nr_maps(maps) != size) { pr_debug("Expected %d maps, got %d", size, maps__nr_maps(maps)); failed = true; } else { - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - if (map__start(map) != merged[i].start || - map__end(map) != merged[i].end || - strcmp(map__dso(map)->name, merged[i].name) || - refcount_read(map__refcnt(map)) != 1) { - failed = true; - } - i++; - } + struct check_maps_cb_args args = { + .merged = merged, + .i = 0, + }; + failed = maps__for_each_map(maps, check_maps_cb, &args); } if (failed) { pr_debug("Expected:\n"); - for (i = 0; i < size; i++) { + for (unsigned int i = 0; i < size; i++) { pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: 1\n", merged[i].start, merged[i].end, merged[i].name); } pr_debug("Got:\n"); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", - map__start(map), - map__end(map), - map__dso(map)->name, - refcount_read(map__refcnt(map))); - } + maps__for_each_map(maps, failed_cb, NULL); } return failed ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 886a13a77a16..012c8ae439fd 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_free_threads; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index f3275be83a33..fb114118c876 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_thread_map_delete; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f78be21a5999..fbdf710d5eea 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -162,6 +162,22 @@ static int test__checkevent_numeric(struct evlist *evlist) return TEST_OK; } + +static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name) +{ + struct perf_pmu *pmu; + + if (evsel->attr.type == PERF_TYPE_HARDWARE) { + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id)); + return 0; + } + pmu = perf_pmus__find_by_type(evsel->attr.type); + + TEST_ASSERT_VAL("unexpected PMU type", pmu); + TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name)); + return 0; +} + static int test__checkevent_symbolic_name(struct evlist *evlist) { struct perf_evsel *evsel; @@ -169,10 +185,12 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + + if (ret) + return ret; } + return TEST_OK; } @@ -183,8 +201,10 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; /* * The period value gets configured within evlist__config, * while this test executes only parse events method. @@ -861,10 +881,14 @@ static int test__group1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions:k */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -878,8 +902,10 @@ static int test__group1(struct evlist *evlist) /* cycles:upp */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -907,6 +933,8 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist)); evlist__for_each_entry(evlist, evsel) { + int ret; + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) { /* faults + :ku modifier */ leader = evsel; @@ -939,8 +967,10 @@ static int test__group2(struct evlist *evlist) continue; } /* cycles:k */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -957,6 +987,7 @@ static int test__group2(struct evlist *evlist) static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2)); @@ -1045,8 +1076,10 @@ static int test__group3(struct evlist *evlist __maybe_unused) continue; } /* instructions:u */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1070,10 +1103,14 @@ static int test__group4(struct evlist *evlist __maybe_unused) num_core_entries() == evlist__nr_groups(evlist)); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:u + p */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1089,8 +1126,10 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* instructions:kp + p */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1108,6 +1147,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) static int test__group5(struct evlist *evlist __maybe_unused) { struct evsel *evsel = NULL, *leader; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (5 * num_core_entries())); @@ -1117,8 +1157,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles + G */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1133,8 +1175,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions + G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1148,8 +1192,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles:G */ evsel = leader = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1164,8 +1210,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions:G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1178,8 +1226,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1201,10 +1251,14 @@ static int test__group_gh1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :H group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1218,8 +1272,10 @@ static int test__group_gh1(struct evlist *evlist) /* cache-misses:G + :H group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1242,10 +1298,14 @@ static int test__group_gh2(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :G group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1259,8 +1319,10 @@ static int test__group_gh2(struct evlist *evlist) /* cache-misses:H + :G group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1283,10 +1345,14 @@ static int test__group_gh3(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :u group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1300,8 +1366,10 @@ static int test__group_gh3(struct evlist *evlist) /* cache-misses:H + :u group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1324,10 +1392,14 @@ static int test__group_gh4(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :uG group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1341,8 +1413,10 @@ static int test__group_gh4(struct evlist *evlist) /* cache-misses:H + :uG group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1363,10 +1437,14 @@ static int test__leader_sample1(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1379,8 +1457,10 @@ static int test__leader_sample1(struct evlist *evlist) /* cache-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1392,8 +1472,10 @@ static int test__leader_sample1(struct evlist *evlist) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1415,10 +1497,14 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) evlist->core.nr_entries == (2 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1431,8 +1517,10 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1472,10 +1560,14 @@ static int test__pinned_group(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1484,13 +1576,18 @@ static int test__pinned_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); } return TEST_OK; @@ -1517,10 +1614,14 @@ static int test__exclusive_group(struct evlist *evlist) evlist->core.nr_entries == 3 * num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1529,13 +1630,18 @@ static int test__exclusive_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); } return TEST_OK; @@ -1677,9 +1783,11 @@ static int test__checkevent_raw_pmu(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return TEST_OK; } @@ -1687,9 +1795,11 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); return TEST_OK; } @@ -1697,9 +1807,11 @@ static int test__sym_event_dc(struct evlist *evlist) static int test__term_equal_term(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); return TEST_OK; } @@ -1707,9 +1819,11 @@ static int test__term_equal_term(struct evlist *evlist) static int test__term_equal_legacy(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); return TEST_OK; } @@ -2549,7 +2663,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest if (strchr(ent->d_name, '.')) continue; - /* exclude parametrized ones (name contains '?') */ + /* exclude parameterized ones (name contains '?') */ n = snprintf(pmu_event, sizeof(pmu_event), "%s%s", path, ent->d_name); if (n >= PATH_MAX) { pr_err("pmu event name crossed PATH_MAX(%d) size\n", PATH_MAX); @@ -2578,7 +2692,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest fclose(file); if (is_event_parameterized == 1) { - pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event); + pr_debug("skipping parameterized PMU event: %s which contains ?\n", pmu_event); continue; } diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index efcd71c2738a..bbe2ddeb9b74 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c index a7e169d1bf64..5f886cd09e6b 100644 --- a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c +++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c @@ -42,7 +42,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned long i, len, size, thr; - pthread_t threads[256]; struct args args[256]; long long v; diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c index c0158fac7d0b..e05a559253ca 100644 --- a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c +++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c @@ -57,7 +57,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, len, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 3) { diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c index 8f6d384208ed..0fc7bf1a25af 100644 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c @@ -51,7 +51,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 2) { diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh new file mode 100755 index 000000000000..14b87af88703 --- /dev/null +++ b/tools/perf/tests/shell/diff.sh @@ -0,0 +1,108 @@ +#!/bin/sh +# perf diff tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog="perf test -w thloop" + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +testsym="test_loop" + +skip_test_missing_symbol ${testsym} + +cleanup() { + rm -rf "${perfdata1}" + rm -rf "${perfdata1}".old + rm -rf "${perfdata2}" + rm -rf "${perfdata2}".old + rm -rf "${perfdata3}" + rm -rf "${perfdata3}".old + + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +make_data() { + file="$1" + if ! perf record -o "${file}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + echo 1 + return + fi + if ! perf report -i "${file}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + echo 1 + return + fi + echo 0 +} + +test_two_files() { + echo "Basic two file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" | grep -q "${testsym}" + then + echo "Basic two file diff test [Failed diff]" + err=1 + return + fi + echo "Basic two file diff test [Success]" +} + +test_three_files() { + echo "Basic three file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata3}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" "${perfdata3}" | grep -q "${testsym}" + then + echo "Basic three file diff test [Failed diff]" + err=1 + return + fi + echo "Basic three file diff test [Success]" +} + +test_two_files +test_three_files + +cleanup +exit $err diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh new file mode 100644 index 000000000000..5d59c32ae3e7 --- /dev/null +++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +perf_has_symbol() +{ + if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then + echo "perf does have symbol '$1'" + return 0 + fi + echo "perf does not have symbol '$1'" + return 1 +} + +skip_test_missing_symbol() +{ + if ! perf_has_symbol "$1" ; then + echo "perf is missing symbols - skipping test" + exit 2 + fi + return 0 +} diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh new file mode 100644 index 000000000000..c2fce1793538 --- /dev/null +++ b/tools/perf/tests/shell/lib/setup_python.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +if [ "x$PYTHON" = "x" ] +then + python3 --version >/dev/null 2>&1 && PYTHON=python3 +fi +if [ "x$PYTHON" = "x" ] +then + python --version >/dev/null 2>&1 && PYTHON=python +fi +if [ "x$PYTHON" = "x" ] +then + echo Skipping test, python not detected please set environment variable PYTHON. + exit 2 +fi diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh new file mode 100755 index 000000000000..22b004f2b23e --- /dev/null +++ b/tools/perf/tests/shell/list.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# perf list tests +# SPDX-License-Identifier: GPL-2.0 + +set -e +err=0 + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh + +test_list_json() { + echo "Json output test" + perf list -j | $PYTHON -m json.tool + echo "Json output test [Success]" +} + +test_list_json +exit $err diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index 8dd115dd35a7..a78d35d2cff0 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -2,10 +2,17 @@ # perf pipe recording and injection test # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + data=$(mktemp /tmp/perf.data.XXXXXX) prog="perf test -w noploop" task="perf" -sym="noploop" if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then echo "cannot find the test file in the perf report" diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index eebeea6bdc76..72c65570db37 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -45,7 +45,10 @@ trace_libc_inet_pton_backtrace() { ;; ppc64|ppc64le) eventattr='max-stack=4' - echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + # Add gaih_inet to expected backtrace only if it is part of libc. + if nm $libc | grep -F -q gaih_inet.; then + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + fi echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 29443b8e8876..3d1a7759a7b2 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -8,10 +8,19 @@ shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh . "${shelldir}"/lib/waiting.sh +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +testsym="test_loop" + +skip_test_missing_symbol ${testsym} + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog="perf test -w thloop" -testsym="test_loop" +cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" +br_cntr_file="/caps/branch_counter_nr" +br_cntr_output="branch stack counters" cleanup() { rm -rf "${perfdata}" @@ -155,10 +164,37 @@ test_workload() { echo "Basic target workload test [Success]" } +test_branch_counter() { + echo "Basic branch counter test" + # Check if the branch counter feature is supported + for dir in $cpu_pmu_dir + do + if [ ! -e "$dir$br_cntr_file" ] + then + echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]" + return + fi + done + if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null + then + echo "Basic branch counter test [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output" + then + echo "Basic branch record test [Failed missing output]" + err=1 + return + fi + echo "Basic branch counter test [Success]" +} + test_per_thread test_register_capture test_system_wide test_workload +test_branch_counter cleanup exit $err diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index a1ef8f0d2b5c..67c925f3a15a 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -77,9 +77,9 @@ test_offcpu_child() { err=1 return fi - # each process waits for read and write, so it should be more than 800 events + # each process waits at least for poll, so it should be more than 400 events if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \ - awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }' + awk -F ";" '{ if (NF > 3 && int($3) < 400) exit 1; }' then echo "Child task off-cpu test [Failed invalid output]" err=1 diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh new file mode 100755 index 000000000000..5ae7bd0031a8 --- /dev/null +++ b/tools/perf/tests/shell/script.sh @@ -0,0 +1,66 @@ +#!/bin/sh +# perf script tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +temp_dir=$(mktemp -d /tmp/perf-test-script.XXXXXXXXXX) + +perfdatafile="${temp_dir}/perf.data" +db_test="${temp_dir}/db_test.py" + +err=0 + +cleanup() +{ + trap - EXIT TERM INT + sane=$(echo "${temp_dir}" | cut -b 1-21) + if [ "${sane}" = "/tmp/perf-test-script" ] ; then + echo "--- Cleaning up ---" + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +} + +trap_cleanup() +{ + cleanup + exit 1 +} + +trap trap_cleanup EXIT TERM INT + + +test_db() +{ + echo "DB test" + + # Check if python script is supported + libpython=$(perf version --build-options | grep python | grep -cv OFF) + if [ "${libpython}" != "1" ] ; then + echo "SKIP: python scripting is not supported" + err=2 + return + fi + + cat << "_end_of_file_" > "${db_test}" +perf_db_export_mode = True +perf_db_export_calls = False +perf_db_export_callchains = True + +def sample_table(*args): + print(f'sample_table({args})') + +def call_path_table(*args): + print(f'call_path_table({args}') +_end_of_file_ + perf record -g -o "${perfdatafile}" true + perf script -i "${perfdatafile}" -s "${db_test}" + echo "DB test [Success]" +} + +test_db + +cleanup + +exit $err diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index 196e22672c50..3bc900533a5d 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -8,20 +8,10 @@ set -e skip_test=0 +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - elif which python > /dev/null - then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX) diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index c77955419173..d2a3506e0d19 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -4,7 +4,7 @@ set -e -# Test all PMU events; however exclude parametrized ones (name contains '?') +# Test all PMU events; however exclude parameterized ones (name contains '?') for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh index ad94c936de7e..7ca172599aa6 100755 --- a/tools/perf/tests/shell/stat_metrics_values.sh +++ b/tools/perf/tests/shell/stat_metrics_values.sh @@ -1,16 +1,10 @@ #!/bin/bash # perf metrics value validation # SPDX-License-Identifier: GPL-2.0 -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - else - echo Skipping test, python3 not detected please set environment variable PYTHON. - exit 2 - fi -fi + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index 66dfdfdad553..e342e6c8aa50 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -2,8 +2,14 @@ # Check Arm64 callgraphs are complete in fp mode # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + lscpu | grep -q "aarch64" || exit 2 +skip_test_missing_symbol leafloop + PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) TEST_PROGRAM="perf test -w leafloop" diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 09908d71c994..5f14d0cb013f 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -4,6 +4,10 @@ # SPDX-License-Identifier: GPL-2.0 # German Gomez <german.gomez@arm.com>, 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + # skip the test if the hardware doesn't support branch stack sampling # and if the architecture doesn't support filter types: any,save_type,u if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then @@ -11,6 +15,8 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev exit 2 fi +skip_test_missing_symbol brstack_bench + TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) TESTPROG="perf test -w brstack" diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 69bb6fe86c50..3dfa91832aa8 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -4,6 +4,13 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan <leo.yan@linaro.org>, 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/waiting.sh +. "${shelldir}"/lib/waiting.sh + +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + skip_if_no_mem_event() { perf mem record -e list 2>&1 | grep -E -q 'available' && return 0 return 2 @@ -11,8 +18,11 @@ skip_if_no_mem_event() { skip_if_no_mem_event || exit 2 +skip_test_missing_symbol buf1 + TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX) check_result() { # The memory report format is as below: @@ -50,13 +60,15 @@ echo "Recording workload..." # specific CPU and test in per-CPU mode. is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo) if (($is_amd >= 1)); then - perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM & + perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" & else - perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM & + perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}" & fi PERFPID=$! +wait_for_perf_to_start ${PERFPID} "${ERR_FILE}" + sleep 1 kill $PERFPID diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh index 6ded58f98f55..c4f1b59d116f 100755 --- a/tools/perf/tests/shell/test_perf_data_converter_json.sh +++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh @@ -6,16 +6,9 @@ set -e err=0 -if [ "$PYTHON" = "" ] ; then - if which python3 > /dev/null ; then - PYTHON=python3 - elif which python > /dev/null ; then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) result=$(mktemp /tmp/__perf_test.output.json.XXXXX) diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index 1de7478ec189..e6fd934b027a 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -57,36 +57,79 @@ static struct perf_event_attr make_event_attr(void) #ifdef HAVE_BPF_SKEL #include <bpf/btf.h> -static bool attr_has_sigtrap(void) +static struct btf *btf; + +static bool btf__available(void) { - bool ret = false; - struct btf *btf; - const struct btf_type *t; + if (btf == NULL) + btf = btf__load_vmlinux_btf(); + + return btf != NULL; +} + +static void btf__exit(void) +{ + btf__free(btf); + btf = NULL; +} + +static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name) +{ + const struct btf_type *t = btf__type_by_id(btf, type_id); const struct btf_member *m; - const char *name; - int i, id; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + const char *current_member_name = btf__name_by_offset(btf, m->name_off); + if (!strcmp(current_member_name, member_name)) + return m; + } - btf = btf__load_vmlinux_btf(); - if (btf == NULL) { + return NULL; +} + +static bool attr_has_sigtrap(void) +{ + int id; + + if (!btf__available()) { /* should be an old kernel */ return false; } id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT); if (id < 0) - goto out; + return false; - t = btf__type_by_id(btf, id); - for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { - name = btf__name_by_offset(btf, m->name_off); - if (!strcmp(name, "sigtrap")) { - ret = true; - break; - } - } -out: - btf__free(btf); - return ret; + return __btf_type__find_member_by_name(id, "sigtrap") != NULL; +} + +static bool kernel_with_sleepable_spinlocks(void) +{ + const struct btf_member *member; + const struct btf_type *type; + const char *type_name; + int id; + + if (!btf__available()) + return false; + + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT); + if (id < 0) + return false; + + // Only RT has a "lock" member for "struct spinlock" + member = __btf_type__find_member_by_name(id, "lock"); + if (member == NULL) + return false; + + // But check its type as well + type = btf__type_by_id(btf, member->type); + if (!type || !btf_is_struct(type)) + return false; + + type_name = btf__name_by_offset(btf, type->name_off); + return type_name && !strcmp(type_name, "rt_mutex_base"); } #else /* !HAVE_BPF_SKEL */ static bool attr_has_sigtrap(void) @@ -109,6 +152,15 @@ static bool attr_has_sigtrap(void) return ret; } + +static bool kernel_with_sleepable_spinlocks(void) +{ + return false; +} + +static void btf__exit(void) +{ +} #endif /* HAVE_BPF_SKEL */ static void @@ -147,7 +199,7 @@ static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) { - int ret; + int ret, expected_sigtraps; ctx.iterate_on = 3000; @@ -156,7 +208,16 @@ static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrie ret = run_test_threads(threads, barrier); TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + expected_sigtraps = NUM_THREADS * ctx.iterate_on; + + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) { + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n", + expected_sigtraps, ctx.signal_count); + pr_debug("See https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/\n"); + return TEST_SKIP; + } else + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); #if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ @@ -221,6 +282,7 @@ out_restore_sigaction: sigaction(SIGTRAP, &oldact, NULL); out: pthread_barrier_destroy(&barrier); + btf__exit(); return ret; } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4d7493fa0105..290716783ac6 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } evlist__add(evlist, evsel); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(getpid()); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e52b031bedc5..5cab17a1942e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub goto out_err; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed!\n"); goto out_err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 968dddde6dda..d33d0952025c 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _ * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(-1); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b394f3ac2d66..dad3d7414142 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -207,5 +207,6 @@ DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); extern const char *dso_to_test; +extern const char *test_objdump_path; #endif /* TESTS_H */ diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 9dee63734e66..2a842f53fbb5 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su if (session_write_header(path)) goto free_path; - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); goto free_path; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 1078a93b01aa..822f893e67d5 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -112,18 +112,92 @@ static bool is_ignored_symbol(const char *name, char type) return false; } +struct test__vmlinux_matches_kallsyms_cb_args { + struct machine kallsyms; + struct map *vmlinux_map; + bool header_printed; +}; + +static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct dso *dso = map__dso(map); + /* + * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while + * the kernel will have the path for the vmlinux file being used, so use + * the short name, less descriptive but the same ("[kernel]" in both + * cases. + */ + struct map *pair = maps__find_by_name(args->kallsyms.kmaps, + (dso->kernel ? dso->short_name : dso->name)); + + if (pair) + map__set_priv(pair, 1); + else { + if (!args->header_printed) { + pr_info("WARN: Maps only in vmlinux:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct map *pair; + u64 mem_start = map__unmap_ip(args->vmlinux_map, map__start(map)); + u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map)); + + pair = maps__find(args->kallsyms.kmaps, mem_start); + if (pair == NULL || map__priv(pair)) + return 0; + + if (map__start(pair) == mem_start) { + struct dso *dso = map__dso(map); + + if (!args->header_printed) { + pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); + args->header_printed = true; + } + + pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", + map__start(map), map__end(map), map__pgoff(map), dso->name); + if (mem_end != map__end(pair)) + pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, + map__start(pair), map__end(pair), map__pgoff(pair)); + pr_info(" %s\n", dso->name); + map__set_priv(pair, 1); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb3(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + + if (!map__priv(map)) { + if (!args->header_printed) { + pr_info("WARN: Maps only in kallsyms:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct rb_node *nd; struct symbol *sym; - struct map *kallsyms_map, *vmlinux_map; - struct map_rb_node *rb_node; - struct machine kallsyms, vmlinux; + struct map *kallsyms_map; + struct machine vmlinux; struct maps *maps; u64 mem_start, mem_end; - bool header_printed; + struct test__vmlinux_matches_kallsyms_cb_args args; /* * Step 1: @@ -131,7 +205,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * Init the machines that will hold kernel, modules obtained from * both vmlinux + .ko files and from /proc/kallsyms split by modules. */ - machine__init(&kallsyms, "", HOST_KERNEL_ID); + machine__init(&args.kallsyms, "", HOST_KERNEL_ID); machine__init(&vmlinux, "", HOST_KERNEL_ID); maps = machine__kernel_maps(&vmlinux); @@ -143,7 +217,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * load /proc/kallsyms. Also create the modules maps from /proc/modules * and find the .ko files that match them in /lib/modules/`uname -r`/. */ - if (machine__create_kernel_maps(&kallsyms) < 0) { + if (machine__create_kernel_maps(&args.kallsyms) < 0) { pr_debug("machine__create_kernel_maps failed"); err = TEST_SKIP; goto out; @@ -160,7 +234,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { + if (machine__load_kallsyms(&args.kallsyms, "/proc/kallsyms") <= 0) { pr_debug("machine__load_kallsyms failed"); err = TEST_SKIP; goto out; @@ -174,7 +248,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * to see if the running kernel was relocated by checking if it has the * same value in the vmlinux file we load. */ - kallsyms_map = machine__kernel_map(&kallsyms); + kallsyms_map = machine__kernel_map(&args.kallsyms); /* * Step 5: @@ -186,7 +260,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused goto out; } - vmlinux_map = machine__kernel_map(&vmlinux); + args.vmlinux_map = machine__kernel_map(&vmlinux); /* * Step 6: @@ -213,7 +287,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * in the kallsyms dso. For the ones that are in both, check its names and * end addresses too. */ - map__for_each_symbol(vmlinux_map, sym, nd) { + map__for_each_symbol(args.vmlinux_map, sym, nd) { struct symbol *pair, *first_pair; sym = rb_entry(nd, struct symbol, rb_node); @@ -221,10 +295,10 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused if (sym->start == sym->end) continue; - mem_start = map__unmap_ip(vmlinux_map, sym->start); - mem_end = map__unmap_ip(vmlinux_map, sym->end); + mem_start = map__unmap_ip(args.vmlinux_map, sym->start); + mem_end = map__unmap_ip(args.vmlinux_map, sym->end); - first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); + first_pair = machine__find_kernel_symbol(&args.kallsyms, mem_start, NULL); pair = first_pair; if (pair && UM(pair->start) == mem_start) { @@ -253,7 +327,8 @@ next_pair: */ continue; } else { - pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); + pair = machine__find_kernel_symbol_by_name(&args.kallsyms, + sym->name, NULL); if (pair) { if (UM(pair->start) == mem_start) goto next_pair; @@ -267,7 +342,7 @@ next_pair: continue; } - } else if (mem_start == map__end(kallsyms.vmlinux_map)) { + } else if (mem_start == map__end(args.kallsyms.vmlinux_map)) { /* * Ignore aliases to _etext, i.e. to the end of the kernel text area, * such as __indirect_thunk_end. @@ -289,78 +364,18 @@ next_pair: if (verbose <= 0) goto out; - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - struct dso *dso = map__dso(map); - /* - * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while - * the kernel will have the path for the vmlinux file being used, - * so use the short name, less descriptive but the same ("[kernel]" in - * both cases. - */ - struct map *pair = maps__find_by_name(kallsyms.kmaps, (dso->kernel ? - dso->short_name : - dso->name)); - if (pair) { - map__set_priv(pair, 1); - } else { - if (!header_printed) { - pr_info("WARN: Maps only in vmlinux:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } - - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *pair, *map = rb_node->map; - - mem_start = map__unmap_ip(vmlinux_map, map__start(map)); - mem_end = map__unmap_ip(vmlinux_map, map__end(map)); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb1, &args); - pair = maps__find(kallsyms.kmaps, mem_start); - if (pair == NULL || map__priv(pair)) - continue; - - if (map__start(pair) == mem_start) { - struct dso *dso = map__dso(map); - - if (!header_printed) { - pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); - header_printed = true; - } - - pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", - map__start(map), map__end(map), map__pgoff(map), dso->name); - if (mem_end != map__end(pair)) - pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, - map__start(pair), map__end(pair), map__pgoff(pair)); - pr_info(" %s\n", dso->name); - map__set_priv(pair, 1); - } - } - - header_printed = false; - - maps = machine__kernel_maps(&kallsyms); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb2, &args); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; + args.header_printed = false; + maps = machine__kernel_maps(&args.kallsyms); + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb3, &args); - if (!map__priv(map)) { - if (!header_printed) { - pr_info("WARN: Maps only in kallsyms:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } out: - machine__exit(&kallsyms); + machine__exit(&args.kallsyms); machine__exit(&vmlinux); return err; } diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c index af05269c2eb8..457b29f91c3e 100644 --- a/tools/perf/tests/workloads/thloop.c +++ b/tools/perf/tests/workloads/thloop.c @@ -7,7 +7,6 @@ #include "../tests.h" static volatile sig_atomic_t done; -static volatile unsigned count; /* We want to check this symbol in perf report */ noinline void test_loop(void); @@ -19,8 +18,7 @@ static void sighandler(int sig __maybe_unused) noinline void test_loop(void) { - while (!done) - __atomic_fetch_add(&count, 1, __ATOMIC_RELAXED); + while (!done); } static void *thfunc(void *arg) diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index cc09dcaa891e..7df4bf5b55a3 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -57,13 +57,13 @@ create_arch_errno_table_func() archlist="$1" default="$2" - printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n' + printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n' printf '{\n' for arch in $archlist; do printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch") - printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch") + printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch") done - printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default") + printf '\treturn errno_to_name__%s;\n' $(arch_string "$default") printf '}\n' } @@ -76,7 +76,9 @@ EoHEADER # Create list of architectures that have a specific errno.h. archlist="" -for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do +for f in $toolsdir/arch/*/include/uapi/asm/errno.h; do + d=${f%/include/uapi/asm/errno.h} + arch="${d##*/}" test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch" done diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 788e8f6bd90e..9feb794f5c6e 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -251,6 +251,4 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool sh void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); -const char *arch_syscalls__strerrno(const char *arch, int err); - #endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 8059342ca412..9455d9672f14 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,9 +4,9 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' +regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$' grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ - sed -r "s/$regex/\2 \1/g" | \ + sed -E "s%$regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh index 8bc7ba62203e..670c6db298ae 100755 --- a/tools/perf/trace/beauty/socket.sh +++ b/tools/perf/trace/beauty/socket.sh @@ -18,10 +18,10 @@ grep -E $ipproto_regex ${uapi_header_dir}/in.h | \ printf "};\n\n" printf "static const char *socket_level[] = {\n" -socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' +socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+/.*)?' grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \ - sed -r "s/$socket_level_regex/\2 \1/g" | \ + sed -E "s%$socket_level_regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ccdb2cd11fbf..ec5e21932876 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -27,7 +27,6 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; - struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -38,11 +37,10 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser) return symbol__annotation(ms->sym); } -static bool disasm_line__filter(struct ui_browser *browser, void *entry) +static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { - struct annotation *notes = browser__annotation(browser); struct annotation_line *al = list_entry(entry, struct annotation_line, node); - return annotation_line__filter(al, notes); + return annotation_line__filter(al); } static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current) @@ -97,7 +95,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int struct annotation_write_ops ops = { .first_line = row == 0, .current_entry = is_current_entry, - .change_color = (!notes->options->hide_src_code && + .change_color = (!annotate_opts.hide_src_code && (!is_current_entry || (browser->use_navkeypressed && !browser->navkeypressed))), @@ -114,7 +112,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) ops.width += 1; - annotation_line__write(al, notes, &ops, ab->opts); + annotation_line__write(al, notes, &ops); if (ops.current_entry) ab->selection = al; @@ -128,7 +126,7 @@ static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) while (pos && pos->al.offset == -1) { pos = list_prev_entry(pos, al.node); - if (!ab->opts->hide_src_code) + if (!annotate_opts.hide_src_code) diff++; } @@ -188,14 +186,14 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) * name right after the '<' token and probably treating this like a * 'call' instruction. */ - target = notes->offsets[cursor->ops.target.offset]; + target = notes->src->offsets[cursor->ops.target.offset]; if (target == NULL) { ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n", cursor->ops.target.offset); return; } - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { from = cursor->al.idx_asm; to = target->idx_asm; } else { @@ -224,7 +222,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) int ret = ui_browser__list_head_refresh(browser); int pcnt_width = annotation__pcnt_width(notes); - if (notes->options->jump_arrows) + if (annotate_opts.jump_arrows) annotate_browser__draw_current_jump(browser); ui_browser__set_color(browser, HE_COLORSET_NORMAL); @@ -258,7 +256,7 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, browser->opts->percent_type) < 0) + if (disasm__cmp(al, l, annotate_opts.percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -270,7 +268,6 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, static void annotate_browser__set_top(struct annotate_browser *browser, struct annotation_line *pos, u32 idx) { - struct annotation *notes = browser__annotation(&browser->b); unsigned back; ui_browser__refresh_dimensions(&browser->b); @@ -280,7 +277,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, while (browser->b.top_idx != 0 && back != 0) { pos = list_entry(pos->node.prev, struct annotation_line, node); - if (annotation_line__filter(pos, notes)) + if (annotation_line__filter(pos)) continue; --browser->b.top_idx; @@ -294,11 +291,10 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct annotation *notes = browser__annotation(&browser->b); struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node); u32 idx = pos->idx; - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) idx = pos->idx_asm; annotate_browser__set_top(browser, pos, idx); browser->curr_hot = nd; @@ -331,13 +327,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double percent; percent = annotation_data__percent(&pos->al.data[i], - browser->opts->percent_type); + annotate_opts.percent_type); if (max_percent < percent) max_percent = percent; } - if (max_percent < 0.01 && pos->al.ipc == 0) { + if (max_percent < 0.01 && (!pos->al.cycles || pos->al.cycles->ipc == 0)) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } @@ -380,12 +376,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); al = list_entry(browser->b.top, struct annotation_line, node); - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { if (al->idx_asm < offset) offset = al->idx; - browser->b.nr_entries = notes->nr_entries; - notes->options->hide_src_code = false; + browser->b.nr_entries = notes->src->nr_entries; + annotate_opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; browser->b.index = al->idx; @@ -402,8 +398,8 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx_asm; - browser->b.nr_entries = notes->nr_asm_entries; - notes->options->hide_src_code = true; + browser->b.nr_entries = notes->src->nr_asm_entries; + annotate_opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; browser->b.index = al->idx_asm; @@ -435,7 +431,7 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); ui_browser__reset_index(browser); - browser->nr_entries = notes->nr_asm_entries; + browser->nr_entries = notes->src->nr_asm_entries; } static int sym_title(struct symbol *sym, struct map *map, char *title, @@ -483,8 +479,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; annotation__unlock(notes); - symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); - sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); + symbol__tui_annotate(&target_ms, evsel, hbt); + sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type); ui_browser__show_title(&browser->b, title); return true; } @@ -500,7 +496,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->al.offset == offset) return pos; - if (!annotation_line__filter(&pos->al, notes)) + if (!annotation_line__filter(&pos->al)) ++*idx; } @@ -544,7 +540,7 @@ struct annotation_line *annotate_browser__find_string(struct annotate_browser *b *idx = browser->b.index; list_for_each_entry_continue(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; ++*idx; @@ -581,7 +577,7 @@ struct annotation_line *annotate_browser__find_string_reverse(struct annotate_br *idx = browser->b.index; list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; --*idx; @@ -659,7 +655,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) { - struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct map_symbol *ms = browser->priv; struct symbol *sym = ms->sym; char symbol_dso[SYM_TITLE_MAX_SIZE]; @@ -667,7 +662,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const if (ui_browser__show(browser, title, help) < 0) return -1; - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type); ui_browser__gotorc_title(browser, 0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); @@ -809,7 +804,7 @@ static int annotate_browser__run(struct annotate_browser *browser, annotate_browser__show(&browser->b, title, help); continue; case 'k': - notes->options->show_linenr = !notes->options->show_linenr; + annotate_opts.show_linenr = !annotate_opts.show_linenr; continue; case 'l': annotate_browser__show_full_location (&browser->b); @@ -822,18 +817,18 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts(help); continue; case 'o': - notes->options->use_offset = !notes->options->use_offset; + annotate_opts.use_offset = !annotate_opts.use_offset; annotation__update_column_widths(notes); continue; case 'O': - if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) - notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL; continue; case 'j': - notes->options->jump_arrows = !notes->options->jump_arrows; + annotate_opts.jump_arrows = !annotate_opts.jump_arrows; continue; case 'J': - notes->options->show_nr_jumps = !notes->options->show_nr_jumps; + annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps; annotation__update_column_widths(notes); continue; case '/': @@ -860,7 +855,7 @@ show_help: browser->b.height, browser->b.index, browser->b.top_idx, - notes->nr_asm_entries); + notes->src->nr_asm_entries); } continue; case K_ENTER: @@ -884,7 +879,7 @@ show_sup_ins: continue; } case 'P': - map_symbol__annotation_dump(ms, evsel, browser->opts); + map_symbol__annotation_dump(ms, evsel); continue; case 't': if (symbol_conf.show_total_period) { @@ -897,15 +892,15 @@ show_sup_ins: annotation__update_column_widths(notes); continue; case 'c': - if (notes->options->show_minmax_cycle) - notes->options->show_minmax_cycle = false; + if (annotate_opts.show_minmax_cycle) + annotate_opts.show_minmax_cycle = false; else - notes->options->show_minmax_cycle = true; + annotate_opts.show_minmax_cycle = true; annotation__update_column_widths(notes); continue; case 'p': case 'b': - switch_percent_type(browser->opts, key == 'b'); + switch_percent_type(&annotate_opts, key == 'b'); hists__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); continue; @@ -932,26 +927,24 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { - return symbol__tui_annotate(ms, evsel, hbt, opts); + return symbol__tui_annotate(ms, evsel, hbt); } int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); - return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); @@ -965,7 +958,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .priv = ms, .use_navkeypressed = true, }, - .opts = opts, }; struct dso *dso; int ret = -1, err; @@ -979,7 +971,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, return -1; if (not_annotated) { - err = symbol__annotate2(ms, evsel, opts, &browser.arch); + err = symbol__annotate2(ms, evsel, &browser.arch); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; @@ -991,12 +983,12 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ui_helpline__push("Press ESC to exit"); - browser.b.width = notes->max_line_len; - browser.b.nr_entries = notes->nr_entries; + browser.b.width = notes->src->max_line_len; + browser.b.nr_entries = notes->src->nr_entries; browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) ui_browser__init_asm_mode(&browser.b); ret = annotate_browser__run(&browser, evsel, hbt); @@ -1006,6 +998,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, out_free_offsets: if(not_annotated) - zfree(¬es->offsets); + zfree(¬es->src->offsets); return ret; } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4812b226818..0c02b3a8e121 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2250,8 +2250,7 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env, - struct annotation_options *annotation_opts) + struct perf_env *env) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2259,7 +2258,6 @@ perf_evsel_browser__new(struct evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; - browser->annotation_opts = annotation_opts; } return browser; } @@ -2432,8 +2430,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!browser->annotation_opts->objdump_path && - perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) + if (!annotate_opts.objdump_path && + perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2445,8 +2443,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) else evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, - browser->annotation_opts); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2943,11 +2940,10 @@ next: static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + struct perf_env *env, bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); struct branch_info *bi = NULL; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3004,6 +3000,7 @@ static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *h /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); if (min_pcnt) browser->min_pcnt = min_pcnt; @@ -3398,7 +3395,6 @@ out: struct evsel_menu { struct ui_browser b; struct evsel *selection; - struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3499,8 +3495,7 @@ browse_hists: hbt->timer(hbt->arg); key = evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, menu->env, - warn_lost_event, - menu->annotation_opts); + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3557,7 +3552,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event, struct annotation_options *annotation_opts) + bool warn_lost_event) { struct evsel *pos; struct evsel_menu menu = { @@ -3572,7 +3567,6 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con }, .min_pcnt = min_pcnt, .env = env, - .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3607,8 +3601,7 @@ static bool evlist__single_entry(struct evlist *evlist) } int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + float min_pcnt, struct perf_env *env, bool warn_lost_event) { int nr_entries = evlist->core.nr_entries; @@ -3617,7 +3610,7 @@ single_entry: { struct evsel *first = evlist__first(evlist); return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event, annotation_opts); + env, warn_lost_event); } } @@ -3635,7 +3628,7 @@ single_entry: { } return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event, annotation_opts); + warn_lost_event); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, @@ -3654,8 +3647,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, } int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts) + float min_percent, struct perf_env *env) { struct hists *hists = &bh->block_hists; struct hist_browser *browser; @@ -3672,11 +3664,11 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, browser->title = block_hists_browser__title; browser->min_pcnt = min_percent; browser->env = env; - browser->annotation_opts = annotation_opts; /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); memset(&action, 0, sizeof(action)); diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 1e938d9ffa5e..de46f6c56b0e 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,7 +4,6 @@ #include "ui/browser.h" -struct annotation_options; struct evsel; struct hist_browser { @@ -15,7 +14,6 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; - struct annotation_options *annotation_opts; struct evsel *block_evsel; int print_seq; bool show_dso; diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 47d2c7a8cbe1..50d45054ed6c 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -166,6 +166,7 @@ void run_script(char *cmd) printf("\033[c\033[H\033[J"); fflush(stdout); SLang_init_tty(0, 0, 0); + SLtty_set_suspend_state(true); SLsmg_refresh(); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 2effac77ca8c..394861245fd3 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -162,7 +162,6 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, } static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { struct dso *dso = map__dso(ms->map); @@ -176,7 +175,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, if (dso->annotate_warned) return -1; - err = symbol__annotate(ms, evsel, options, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; @@ -244,10 +243,9 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { - return symbol__gtk_annotate(&he->ms, evsel, options, hbt); + return symbol__gtk_annotate(&he->ms, evsel, hbt); } void perf_gtk__show_annotations(void) diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 1e84dceb5267..a2b497f03fd6 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -56,13 +56,11 @@ struct evsel; struct evlist; struct hist_entry; struct hist_browser_timer; -struct annotation_options; int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt); int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); void perf_gtk__show_annotations(void); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 605d9e175ea7..16c6eff4d241 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -2,12 +2,14 @@ #include <signal.h> #include <stdbool.h> #include <stdlib.h> +#include <termios.h> #include <unistd.h> #include <linux/kernel.h> #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #endif +#include "../../util/color.h" #include "../../util/debug.h" #include "../browser.h" #include "../helpline.h" @@ -121,6 +123,23 @@ static void ui__signal(int sig) exit(0); } +static void ui__sigcont(int sig) +{ + static struct termios tty; + + if (sig == SIGTSTP) { + while (tcgetattr(SLang_TT_Read_FD, &tty) == -1 && errno == EINTR) + ; + while (write(SLang_TT_Read_FD, PERF_COLOR_RESET, sizeof(PERF_COLOR_RESET) - 1) == -1 && errno == EINTR) + ; + raise(SIGSTOP); + } else { + while (tcsetattr(SLang_TT_Read_FD, TCSADRAIN, &tty) == -1 && errno == EINTR) + ; + raise(SIGWINCH); + } +} + int ui__init(void) { int err; @@ -135,6 +154,7 @@ int ui__init(void) err = SLang_init_tty(-1, 0, 0); if (err < 0) goto out; + SLtty_set_suspend_state(true); err = SLkp_init(); if (err < 0) { @@ -149,6 +169,8 @@ int ui__init(void) signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); + signal(SIGTSTP, ui__sigcont); + signal(SIGCONT, ui__sigcont); perf_error__register(&perf_tui_eops); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 988473bf907a..8027f450fa3e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -195,6 +195,8 @@ endif perf-$(CONFIG_DWARF) += probe-finder.o perf-$(CONFIG_DWARF) += dwarf-aux.o perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += debuginfo.o +perf-$(CONFIG_DWARF) += annotate-data.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c new file mode 100644 index 000000000000..f22b4f18271c --- /dev/null +++ b/tools/perf/util/annotate-data.c @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convert sample address to data type using DWARF debug info. + * + * Written by Namhyung Kim <namhyung@kernel.org> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "annotate-data.h" +#include "debuginfo.h" +#include "debug.h" +#include "dso.h" +#include "evsel.h" +#include "evlist.h" +#include "map.h" +#include "map_symbol.h" +#include "strbuf.h" +#include "symbol.h" +#include "symbol_conf.h" + +/* + * Compare type name and size to maintain them in a tree. + * I'm not sure if DWARF would have information of a single type in many + * different places (compilation units). If not, it could compare the + * offset of the type entry in the .debug_info section. + */ +static int data_type_cmp(const void *_key, const struct rb_node *node) +{ + const struct annotated_data_type *key = _key; + struct annotated_data_type *type; + + type = rb_entry(node, struct annotated_data_type, node); + + if (key->self.size != type->self.size) + return key->self.size - type->self.size; + return strcmp(key->self.type_name, type->self.type_name); +} + +static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) +{ + struct annotated_data_type *a, *b; + + a = rb_entry(node_a, struct annotated_data_type, node); + b = rb_entry(node_b, struct annotated_data_type, node); + + if (a->self.size != b->self.size) + return a->self.size < b->self.size; + return strcmp(a->self.type_name, b->self.type_name) < 0; +} + +/* Recursively add new members for struct/union */ +static int __add_member_cb(Dwarf_Die *die, void *arg) +{ + struct annotated_member *parent = arg; + struct annotated_member *member; + Dwarf_Die member_type, die_mem; + Dwarf_Word size, loc; + Dwarf_Attribute attr; + struct strbuf sb; + int tag; + + if (dwarf_tag(die) != DW_TAG_member) + return DIE_FIND_CB_SIBLING; + + member = zalloc(sizeof(*member)); + if (member == NULL) + return DIE_FIND_CB_END; + + strbuf_init(&sb, 32); + die_get_typename(die, &sb); + + die_get_real_type(die, &member_type); + if (dwarf_aggregate_size(&member_type, &size) < 0) + size = 0; + + if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) + loc = 0; + else + dwarf_formudata(&attr, &loc); + + member->type_name = strbuf_detach(&sb, NULL); + /* member->var_name can be NULL */ + if (dwarf_diename(die)) + member->var_name = strdup(dwarf_diename(die)); + member->size = size; + member->offset = loc + parent->offset; + INIT_LIST_HEAD(&member->children); + list_add_tail(&member->node, &parent->children); + + tag = dwarf_tag(&member_type); + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + die_find_child(&member_type, __add_member_cb, member, &die_mem); + break; + default: + break; + } + return DIE_FIND_CB_SIBLING; +} + +static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) +{ + Dwarf_Die die_mem; + + die_find_child(type, __add_member_cb, &parent->self, &die_mem); +} + +static void delete_members(struct annotated_member *member) +{ + struct annotated_member *child, *tmp; + + list_for_each_entry_safe(child, tmp, &member->children, node) { + list_del(&child->node); + delete_members(child); + free(child->type_name); + free(child->var_name); + free(child); + } +} + +static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, + Dwarf_Die *type_die) +{ + struct annotated_data_type *result = NULL; + struct annotated_data_type key; + struct rb_node *node; + struct strbuf sb; + char *type_name; + Dwarf_Word size; + + strbuf_init(&sb, 32); + if (die_get_typename_from_type(type_die, &sb) < 0) + strbuf_add(&sb, "(unknown type)", 14); + type_name = strbuf_detach(&sb, NULL); + dwarf_aggregate_size(type_die, &size); + + /* Check existing nodes in dso->data_types tree */ + key.self.type_name = type_name; + key.self.size = size; + node = rb_find(&key, &dso->data_types, data_type_cmp); + if (node) { + result = rb_entry(node, struct annotated_data_type, node); + free(type_name); + return result; + } + + /* If not, add a new one */ + result = zalloc(sizeof(*result)); + if (result == NULL) { + free(type_name); + return NULL; + } + + result->self.type_name = type_name; + result->self.size = size; + INIT_LIST_HEAD(&result->self.children); + + if (symbol_conf.annotate_data_member) + add_member_types(result, type_die); + + rb_add(&result->node, &dso->data_types, data_type_less); + return result; +} + +static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) +{ + Dwarf_Off off, next_off; + size_t header_size; + + if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) + return cu_die; + + /* + * There are some kernels don't have full aranges and contain only a few + * aranges entries. Fallback to iterate all CU entries in .debug_info + * in case it's missing. + */ + off = 0; + while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, + NULL, NULL, NULL) == 0) { + if (dwarf_offdie(di->dbg, off + header_size, cu_die) && + dwarf_haspc(cu_die, pc)) + return true; + + off = next_off; + } + return false; +} + +/* The type info will be saved in @type_die */ +static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) +{ + Dwarf_Word size; + + /* Get the type of the variable */ + if (die_get_real_type(var_die, type_die) == NULL) { + pr_debug("variable has no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* + * It expects a pointer type for a memory access. + * Convert to a real type it points to. + */ + if (dwarf_tag(type_die) != DW_TAG_pointer_type || + die_get_real_type(type_die, type_die) == NULL) { + pr_debug("no pointer or no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0) { + pr_debug("type size is unknown\n"); + ann_data_stat.invalid_size++; + return -1; + } + + /* Minimal sanity check */ + if ((unsigned)offset >= size) { + pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); + ann_data_stat.bad_offset++; + return -1; + } + + return 0; +} + +/* The result will be saved in @type_die */ +static int find_data_type_die(struct debuginfo *di, u64 pc, + int reg, int offset, Dwarf_Die *type_die) +{ + Dwarf_Die cu_die, var_die; + Dwarf_Die *scopes = NULL; + int ret = -1; + int i, nr_scopes; + + /* Get a compile_unit for this address */ + if (!find_cu_die(di, pc, &cu_die)) { + pr_debug("cannot find CU for address %" PRIx64 "\n", pc); + ann_data_stat.no_cuinfo++; + return -1; + } + + /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ + nr_scopes = die_get_scopes(&cu_die, pc, &scopes); + + /* Search from the inner-most scope to the outer */ + for (i = nr_scopes - 1; i >= 0; i--) { + /* Look up variables/parameters in this scope */ + if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die)) + continue; + + /* Found a variable, see if it's correct */ + ret = check_variable(&var_die, type_die, offset); + goto out; + } + if (ret < 0) + ann_data_stat.no_var++; + +out: + free(scopes); + return ret; +} + +/** + * find_data_type - Return a data type at the location + * @ms: map and symbol at the location + * @ip: instruction address of the memory access + * @reg: register that holds the base address + * @offset: offset from the base address + * + * This functions searches the debug information of the binary to get the data + * type it accesses. The exact location is expressed by (ip, reg, offset). + * It return %NULL if not found. + */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset) +{ + struct annotated_data_type *result = NULL; + struct dso *dso = map__dso(ms->map); + struct debuginfo *di; + Dwarf_Die type_die; + u64 pc; + + di = debuginfo__new(dso->long_name); + if (di == NULL) { + pr_debug("cannot get the debug info\n"); + return NULL; + } + + /* + * IP is a relative instruction address from the start of the map, as + * it can be randomized/relocated, it needs to translate to PC which is + * a file address for DWARF processing. + */ + pc = map__rip_2objdump(ms->map, ip); + if (find_data_type_die(di, pc, reg, offset, &type_die) < 0) + goto out; + + result = dso__findnew_data_type(dso, &type_die); + +out: + debuginfo__delete(di); + return result; +} + +static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) +{ + int i; + size_t sz = sizeof(struct type_hist); + + sz += sizeof(struct type_hist_entry) * adt->self.size; + + /* Allocate a table of pointers for each event */ + adt->nr_histograms = nr_entries; + adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); + if (adt->histograms == NULL) + return -ENOMEM; + + /* + * Each histogram is allocated for the whole size of the type. + * TODO: Probably we can move the histogram to members. + */ + for (i = 0; i < nr_entries; i++) { + adt->histograms[i] = zalloc(sz); + if (adt->histograms[i] == NULL) + goto err; + } + return 0; + +err: + while (--i >= 0) + free(adt->histograms[i]); + free(adt->histograms); + return -ENOMEM; +} + +static void delete_data_type_histograms(struct annotated_data_type *adt) +{ + for (int i = 0; i < adt->nr_histograms; i++) + free(adt->histograms[i]); + free(adt->histograms); +} + +void annotated_data_type__tree_delete(struct rb_root *root) +{ + struct annotated_data_type *pos; + + while (!RB_EMPTY_ROOT(root)) { + struct rb_node *node = rb_first(root); + + rb_erase(node, root); + pos = rb_entry(node, struct annotated_data_type, node); + delete_members(&pos->self); + delete_data_type_histograms(pos); + free(pos->self.type_name); + free(pos); + } +} + +/** + * annotated_data_type__update_samples - Update histogram + * @adt: Data type to update + * @evsel: Event to update + * @offset: Offset in the type + * @nr_samples: Number of samples at this offset + * @period: Event count at this offset + * + * This function updates type histogram at @ofs for @evsel. Samples are + * aggregated before calling this function so it can be called with more + * than one samples at a certain offset. + */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period) +{ + struct type_hist *h; + + if (adt == NULL) + return 0; + + if (adt->histograms == NULL) { + int nr = evsel->evlist->core.nr_entries; + + if (alloc_data_type_histograms(adt, nr) < 0) + return -1; + } + + if (offset < 0 || offset >= adt->self.size) + return -1; + + h = adt->histograms[evsel->core.idx]; + + h->nr_samples += nr_samples; + h->addr[offset].nr_samples += nr_samples; + h->period += period; + h->addr[offset].period += period; + return 0; +} diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h new file mode 100644 index 000000000000..8e73096c01d1 --- /dev/null +++ b/tools/perf/util/annotate-data.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PERF_ANNOTATE_DATA_H +#define _PERF_ANNOTATE_DATA_H + +#include <errno.h> +#include <linux/compiler.h> +#include <linux/rbtree.h> +#include <linux/types.h> + +struct evsel; +struct map_symbol; + +/** + * struct annotated_member - Type of member field + * @node: List entry in the parent list + * @children: List head for child nodes + * @type_name: Name of the member type + * @var_name: Name of the member variable + * @offset: Offset from the outer data type + * @size: Size of the member field + * + * This represents a member type in a data type. + */ +struct annotated_member { + struct list_head node; + struct list_head children; + char *type_name; + char *var_name; + int offset; + int size; +}; + +/** + * struct type_hist_entry - Histogram entry per offset + * @nr_samples: Number of samples + * @period: Count of event + */ +struct type_hist_entry { + int nr_samples; + u64 period; +}; + +/** + * struct type_hist - Type histogram for each event + * @nr_samples: Total number of samples in this data type + * @period: Total count of the event in this data type + * @offset: Array of histogram entry + */ +struct type_hist { + u64 nr_samples; + u64 period; + struct type_hist_entry addr[]; +}; + +/** + * struct annotated_data_type - Data type to profile + * @node: RB-tree node for dso->type_tree + * @self: Actual type information + * @nr_histogram: Number of histogram entries + * @histograms: An array of pointers to histograms + * + * This represents a data type accessed by samples in the profile data. + */ +struct annotated_data_type { + struct rb_node node; + struct annotated_member self; + int nr_histograms; + struct type_hist **histograms; +}; + +extern struct annotated_data_type unknown_type; + +/** + * struct annotated_data_stat - Debug statistics + * @total: Total number of entry + * @no_sym: No symbol or map found + * @no_insn: Failed to get disasm line + * @no_insn_ops: The instruction has no operands + * @no_mem_ops: The instruction has no memory operands + * @no_reg: Failed to extract a register from the operand + * @no_dbginfo: The binary has no debug information + * @no_cuinfo: Failed to find a compile_unit + * @no_var: Failed to find a matching variable + * @no_typeinfo: Failed to get a type info for the variable + * @invalid_size: Failed to get a size info of the type + * @bad_offset: The access offset is out of the type + */ +struct annotated_data_stat { + int total; + int no_sym; + int no_insn; + int no_insn_ops; + int no_mem_ops; + int no_reg; + int no_dbginfo; + int no_cuinfo; + int no_var; + int no_typeinfo; + int invalid_size; + int bad_offset; +}; +extern struct annotated_data_stat ann_data_stat; + +#ifdef HAVE_DWARF_SUPPORT + +/* Returns data type at the location (ip, reg, offset) */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset); + +/* Update type access histogram at the given offset */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period); + +/* Release all data type information in the tree */ +void annotated_data_type__tree_delete(struct rb_root *root); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline struct annotated_data_type * +find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, + int reg __maybe_unused, int offset __maybe_unused) +{ + return NULL; +} + +static inline int +annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused, + struct evsel *evsel __maybe_unused, + int offset __maybe_unused, + int nr_samples __maybe_unused, + u64 period __maybe_unused) +{ + return -1; +} + +static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused) +{ +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#endif /* _PERF_ANNOTATE_DATA_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 82956adf9963..9b70ab110ce7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -25,12 +25,14 @@ #include "units.h" #include "debug.h" #include "annotate.h" +#include "annotate-data.h" #include "evsel.h" #include "evlist.h" #include "bpf-event.h" #include "bpf-utils.h" #include "block-range.h" #include "string2.h" +#include "dwarf-regs.h" #include "util/event.h" #include "util/sharded_mutex.h" #include "arch/common.h" @@ -57,6 +59,9 @@ #include <linux/ctype.h> +/* global annotation options */ +struct annotation_options annotate_opts; + static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); @@ -85,6 +90,8 @@ struct arch { struct { char comment_char; char skip_functions_char; + char register_char; + char memory_ref_char; } objdump; }; @@ -96,6 +103,10 @@ static struct ins_ops nop_ops; static struct ins_ops lock_ops; static struct ins_ops ret_ops; +/* Data type collection debug statistics */ +struct annotated_data_stat ann_data_stat; +LIST_HEAD(ann_insn_stat); + static int arch__grow_instructions(struct arch *arch) { struct ins *new_instructions; @@ -188,6 +199,8 @@ static struct arch architectures[] = { .insn_suffix = "bwlq", .objdump = { .comment_char = '#', + .register_char = '%', + .memory_ref_char = '(', }, }, { @@ -340,10 +353,10 @@ bool ins__is_call(const struct ins *ins) */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { - if (ops->raw_comment && c > ops->raw_comment) + if (ops->jump.raw_comment && c > ops->jump.raw_comment) return NULL; - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) return NULL; return c; @@ -359,8 +372,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s const char *c = strchr(ops->raw, ','); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); c = validate_comma(c, ops); @@ -462,7 +475,16 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, ops->target.offset); } +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + static struct ins_ops jump_ops = { + .free = jump__delete, .parse = jump__parse, .scnprintf = jump__scnprintf, }; @@ -557,6 +579,34 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; +/* + * Check if the operand has more than one registers like x86 SIB addressing: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count = 0; + + if (arch->objdump.register_char == 0) + return false; + + if (arch->objdump.memory_ref_char) { + op = strchr(op, arch->objdump.memory_ref_char); + if (op == NULL) + return false; + } + + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { + count++; + op++; + } + + return count > 1; +} + static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -584,6 +634,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); @@ -604,6 +656,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->target.raw == NULL) goto out_free_source; + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); + if (comment == NULL) return 0; @@ -795,6 +849,11 @@ static struct arch *arch__find(const char *name) return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } +bool arch__is(struct arch *arch, const char *name) +{ + return !strcmp(arch->name, name); +} + static struct annotated_source *annotated_source__new(void) { struct annotated_source *src = zalloc(sizeof(*src)); @@ -810,7 +869,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } @@ -845,18 +903,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -865,9 +911,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -958,23 +1005,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } +struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) - return NULL; - goto alloc_cycles_hist; - } + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + const size_t size = symbol__size(sym); - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); } - return notes->src->cycles_hist; + return branch->cycles_hist; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -1077,12 +1134,20 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 u64 offset; for (offset = start; offset <= end; offset++) { - if (notes->offsets[offset]) + if (notes->src->offsets[offset]) n_insn++; } return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; @@ -1091,6 +1156,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ @@ -1098,54 +1164,76 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 return; for (offset = start; offset <= end; offset++) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; - if (al && al->ipc == 0.0) { - al->ipc = ipc; + if (al && al->cycles && al->cycles->ipc == 0.0) { + al->cycles->ipc = ipc; cover_insn++; } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } -void annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size) { + int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) - return; + if (!notes->branch || !notes->branch->cycles_hist) + return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; + al = notes->src->offsets[offset]; + if (al && al->cycles == NULL) { + al->cycles = zalloc(sizeof(*al->cycles)); + if (al->cycles == NULL) { + err = ENOMEM; + break; + } + } if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); - al = notes->offsets[offset]; if (al && ch->num_aggr) { - al->cycles = ch->cycles_aggr / ch->num_aggr; - al->cycles_max = ch->cycles_max; - al->cycles_min = ch->cycles_min; + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; + al->cycles->max = ch->cycles_max; + al->cycles->min = ch->cycles_min; + } + } + } + + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; + + if (ch && ch->cycles) { + struct annotation_line *al = notes->src->offsets[offset]; + if (al) + zfree(&al->cycles); } - notes->have_cycles = true; } } + annotation__unlock(notes); + return 0; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1225,6 +1313,7 @@ static void annotation_line__exit(struct annotation_line *al) { zfree_srcline(&al->path); zfree(&al->line); + zfree(&al->cycles); } static size_t disasm_line_size(int nr) @@ -1299,6 +1388,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -1817,7 +1907,6 @@ static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) { struct annotation *notes = symbol__annotation(sym); - struct annotation_options *opts = args->options; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; @@ -1927,7 +2016,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, prev_buf_size = buf_size; fflush(s); - if (!opts->hide_src_code && srcline) { + if (!annotate_opts.hide_src_code && srcline) { args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; @@ -2050,7 +2139,7 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { - struct annotation_options *opts = args->options; + struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; @@ -2113,12 +2202,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : "", opts->prefix ? "--prefix " : "", @@ -2299,15 +2389,8 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) +static int evsel__get_arch(struct evsel *evsel, struct arch **parch) { - struct symbol *sym = ms->sym; - struct annotation *notes = symbol__annotation(sym); - struct annotate_args args = { - .evsel = evsel, - .options = options, - }; struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; @@ -2316,25 +2399,45 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, if (!arch_name) return errno; - args.arch = arch = arch__find(arch_name); + *parch = arch = arch__find(arch_name); if (arch == NULL) { pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; } - if (parch) - *parch = arch; - if (arch->init) { err = arch->init(arch, env ? env->cpuid : NULL); if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + pr_err("%s: failed to initialize %s arch priv area\n", + __func__, arch->name); return err; } } + return 0; +} +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, + struct arch **parch) +{ + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct annotate_args args = { + .evsel = evsel, + .options = &annotate_opts, + }; + struct arch *arch = NULL; + int err; + + err = evsel__get_arch(evsel, &arch); + if (err < 0) + return err; + + if (parch) + *parch = arch; + + args.arch = arch; args.ms = *ms; - if (notes->options && notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2342,12 +2445,12 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al, - struct annotation_options *opts) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; + unsigned int percent_type = annotate_opts.percent_type; int i, ret; while (*p != NULL) { @@ -2358,7 +2461,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, if (ret == 0) { for (i = 0; i < al->data_nr; i++) { iter->data[i].percent_sum += annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } return; } @@ -2371,7 +2474,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, for (i = 0; i < al->data_nr; i++) { al->data[i].percent_sum = annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } rb_link_node(&al->rb_node, parent, p); @@ -2493,8 +2596,7 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -2505,6 +2607,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); struct annotation_line *pos, *queue = NULL; + struct annotation_options *opts = &annotate_opts; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; @@ -2633,8 +2736,7 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, - struct annotation_options *opts) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -2649,9 +2751,9 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, struct annotation_line *al; list_for_each_entry(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; - annotation_line__write(al, notes, &wops, opts); + annotation_line__write(al, notes, &wops); fputc('\n', fp); wops.first_line = false; } @@ -2659,8 +2761,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) { const char *ev_name = evsel__name(evsel); char buf[1024]; @@ -2682,7 +2783,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, map__dso(ms->map)->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp, opts); + symbol__annotate_fprintf2(ms->sym, fp); fclose(fp); err = 0; @@ -2769,7 +2870,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) return; for (offset = 0; offset < size; ++offset) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; struct disasm_line *dl; dl = disasm_line(al); @@ -2777,7 +2878,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (!disasm_line__is_valid_local_jump(dl, sym)) continue; - al = notes->offsets[dl->ops.target.offset]; + al = notes->src->offsets[dl->ops.target.offset]; /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we @@ -2794,19 +2895,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__set_offsets(struct annotation *notes, s64 size) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; + if (src->max_line_len < line_len) + src->max_line_len = line_len; + al->idx = src->nr_entries++; if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; + al->idx_asm = src->nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly * routines that comes with labels in the same column @@ -2815,7 +2917,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size) * E.g. copy_user_generic_unrolled */ if (al->offset < size) - notes->offsets[al->offset] = al; + notes->src->offsets[al->offset] = al; } else al->idx_asm = -1; } @@ -2858,24 +2960,24 @@ void annotation__init_column_widths(struct annotation *notes, struct symbol *sym void annotation__update_column_widths(struct annotation *notes) { - if (notes->options->use_offset) + if (annotate_opts.use_offset) notes->widths.target = notes->widths.min_addr; - else if (notes->options->full_addr) + else if (annotate_opts.full_addr) notes->widths.target = BITS_PER_LONG / 4; else notes->widths.target = notes->widths.max_addr; notes->widths.addr = notes->widths.target; - if (notes->options->show_nr_jumps) + if (annotate_opts.show_nr_jumps) notes->widths.addr += notes->widths.jumps + 1; } void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) { - notes->options->full_addr = !notes->options->full_addr; + annotate_opts.full_addr = !annotate_opts.full_addr; - if (notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2884,8 +2986,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m } static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, - struct annotation_options *opts) + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -2898,7 +2999,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, double percent; percent = annotation_data__percent(&al->data[i], - opts->percent_type); + annotate_opts.percent_type); if (percent > percent_max) percent_max = percent; @@ -2909,22 +3010,20 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al, opts); + insert_source_line(&tmp_root, al); } resort_source_line(root, &tmp_root); } -static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, - struct annotation_options *opts) +static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root, opts); + annotation__calc_lines(notes, ms->map, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; @@ -2933,7 +3032,7 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, char buf[1024]; int err; - err = symbol__annotate2(ms, evsel, opts, NULL); + err = symbol__annotate2(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2943,31 +3042,31 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, return -1; } - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout, opts); + buf, percent_type_str(annotate_opts.percent_type), sym->name, + dso->long_name); + symbol__annotate_fprintf2(sym, stdout); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; int err; - err = symbol__annotate(ms, evsel, opts, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2979,13 +3078,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, symbol__calc_percent(sym, evsel); - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(ms, evsel, opts); + symbol__annotate_printf(ms, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -3046,19 +3145,20 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); + disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name); } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", @@ -3083,8 +3183,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles) { - if (al->ipc == 0.0 && al->cycles == 0) + if (notes->branch && al->cycles) { + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else show_title = true; @@ -3120,18 +3220,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } } - if (notes->have_cycles) { - if (al->ipc) - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + if (notes->branch) { + if (al->cycles && al->cycles->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (!notes->options->show_minmax_cycle) { - if (al->cycles) + if (!annotate_opts.show_minmax_cycle) { + if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); @@ -3145,8 +3245,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati scnprintf(str, sizeof(str), "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", - al->cycles, al->cycles_min, - al->cycles_max); + al->cycles->avg, al->cycles->min, + al->cycles->max); obj__printf(obj, "%*s ", ANNOTATION__MINMAX_CYCLES_WIDTH - 1, @@ -3172,7 +3272,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!*al->line) obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); else if (al->offset == -1) { - if (al->line_nr && notes->options->show_linenr) + if (al->line_nr && annotate_opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -3182,15 +3282,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati u64 addr = al->offset; int color = -1; - if (!notes->options->use_offset) + if (!annotate_opts.use_offset) addr += notes->start; - if (!notes->options->use_offset) { + if (!annotate_opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (al->jump_sources && - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { - if (notes->options->show_nr_jumps) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { + if (annotate_opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", notes->widths.jumps, @@ -3204,9 +3304,9 @@ print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); } else if (ins__is_call(&disasm_line(al)->ins) && - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { goto print_addr; - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", @@ -3228,43 +3328,44 @@ print_addr: } void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops, - struct annotation_options *opts) + struct annotation_write_ops *wops) { __annotation_line__write(al, notes, wops->first_line, wops->current_entry, wops->change_color, wops->width, wops->obj, - opts->percent_type, + annotate_opts.percent_type, wops->set_color, wops->set_percent_color, wops->set_jumps_percent_color, wops->printf, wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); size_t size = symbol__size(sym); int nr_pcnt = 1, err; - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); - if (notes->offsets == NULL) + notes->src->offsets = zalloc(size * sizeof(struct annotation_line *)); + if (notes->src->offsets == NULL) return ENOMEM; if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, options, parch); + err = symbol__annotate(ms, evsel, parch); if (err) goto out_free_offsets; - notes->options = options; - symbol__calc_percent(sym, evsel); annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); - annotation__compute_ipc(notes, size); + + err = annotation__compute_ipc(notes, size); + if (err) + goto out_free_offsets; + annotation__init_column_widths(notes, sym); notes->nr_events = nr_pcnt; @@ -3274,7 +3375,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; out_free_offsets: - zfree(¬es->offsets); + zfree(¬es->src->offsets); return err; } @@ -3337,8 +3438,10 @@ static int annotation__config(const char *var, const char *value, void *data) return 0; } -void annotation_options__init(struct annotation_options *opt) +void annotation_options__init(void) { + struct annotation_options *opt = &annotate_opts; + memset(opt, 0, sizeof(*opt)); /* Default values. */ @@ -3349,16 +3452,15 @@ void annotation_options__init(struct annotation_options *opt) opt->percent_type = PERCENT_PERIOD_LOCAL; } - -void annotation_options__exit(struct annotation_options *opt) +void annotation_options__exit(void) { - zfree(&opt->disassembler_style); - zfree(&opt->objdump_path); + zfree(&annotate_opts.disassembler_style); + zfree(&annotate_opts.objdump_path); } -void annotation_config__init(struct annotation_options *opt) +void annotation_config__init(void) { - perf_config(annotation__config, opt); + perf_config(annotation__config, &annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3382,10 +3484,9 @@ static unsigned int parse_percent_type(char *str1, char *str2) return type; } -int annotate_parse_percent_type(const struct option *opt, const char *_str, +int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, int unset __maybe_unused) { - struct annotation_options *opts = opt->value; unsigned int type; char *str1, *str2; int err = -1; @@ -3404,7 +3505,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, if (type == (unsigned int) -1) type = parse_percent_type(str2, str1); if (type != (unsigned int) -1) { - opts->percent_type = type; + annotate_opts.percent_type = type; err = 0; } @@ -3413,11 +3514,267 @@ out: return err; } -int annotate_check_args(struct annotation_options *args) +int annotate_check_args(void) { + struct annotation_options *args = &annotate_opts; + if (args->prefix_strip && !args->prefix) { pr_err("--prefix-strip requires --prefix\n"); return -1; } return 0; } + +/* + * Get register number and access offset from the given instruction. + * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs + * to revisit the format when it handles different architecture. + * Fills @reg and @offset when return 0. + */ +static int extract_reg_offset(struct arch *arch, const char *str, + struct annotated_op_loc *op_loc) +{ + char *p; + char *regname; + + if (arch->objdump.register_char == 0) + return -1; + + /* + * It should start from offset, but it's possible to skip 0 + * in the asm. So 0(%rax) should be same as (%rax). + * + * However, it also start with a segment select register like + * %gs:0x18(%rbx). In that case it should skip the part. + */ + if (*str == arch->objdump.register_char) { + while (*str && !isdigit(*str) && + *str != arch->objdump.memory_ref_char) + str++; + } + + op_loc->offset = strtol(str, &p, 0); + + p = strchr(p, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg = get_dwarf_regnum(regname, 0); + free(regname); + return 0; +} + +/** + * annotate_get_insn_location - Get location of instruction + * @arch: the architecture info + * @dl: the target instruction + * @loc: a buffer to save the data + * + * Get detailed location info (register and offset) in the instruction. + * It needs both source and target operand and whether it accesses a + * memory location. The offset field is meaningful only when the + * corresponding mem flag is set. + * + * Some examples on x86: + * + * mov (%rax), %rcx # src_reg = rax, src_mem = 1, src_offset = 0 + * # dst_reg = rcx, dst_mem = 0 + * + * mov 0x18, %r8 # src_reg = -1, dst_reg = r8 + */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc) +{ + struct ins_operands *ops; + struct annotated_op_loc *op_loc; + int i; + + if (!strcmp(dl->ins.name, "lock")) + ops = dl->ops.locked.ops; + else + ops = &dl->ops; + + if (ops == NULL) + return -1; + + memset(loc, 0, sizeof(*loc)); + + for_each_insn_op_loc(loc, i, op_loc) { + const char *insn_str = ops->source.raw; + + if (i == INSN_OP_TARGET) + insn_str = ops->target.raw; + + /* Invalidate the register by default */ + op_loc->reg = -1; + + if (insn_str == NULL) + continue; + + if (strchr(insn_str, arch->objdump.memory_ref_char)) { + op_loc->mem_ref = true; + extract_reg_offset(arch, insn_str, op_loc); + } else { + char *s = strdup(insn_str); + + if (s) { + op_loc->reg = get_dwarf_regnum(s, 0); + free(s); + } + } + } + + return 0; +} + +static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel) +{ + struct disasm_line *dl, *tmp_dl; + struct annotation *notes; + + notes = symbol__annotation(ms->sym); + if (!list_empty(¬es->src->source)) + return; + + if (symbol__annotate(ms, evsel, NULL) < 0) + return; + + /* remove non-insn disasm lines for simplicity */ + list_for_each_entry_safe(dl, tmp_dl, ¬es->src->source, al.node) { + if (dl->al.offset == -1) { + list_del(&dl->al.node); + free(dl); + } + } +} + +static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip) +{ + struct disasm_line *dl; + struct annotation *notes; + + notes = symbol__annotation(sym); + + list_for_each_entry(dl, ¬es->src->source, al.node) { + if (sym->start + dl->al.offset == ip) + return dl; + } + return NULL; +} + +static struct annotated_item_stat *annotate_data_stat(struct list_head *head, + const char *name) +{ + struct annotated_item_stat *istat; + + list_for_each_entry(istat, head, list) { + if (!strcmp(istat->name, name)) + return istat; + } + + istat = zalloc(sizeof(*istat)); + if (istat == NULL) + return NULL; + + istat->name = strdup(name); + if (istat->name == NULL) { + free(istat); + return NULL; + } + + list_add_tail(&istat->list, head); + return istat; +} + +/** + * hist_entry__get_data_type - find data type for given hist entry + * @he: hist entry + * + * This function first annotates the instruction at @he->ip and extracts + * register and offset info from it. Then it searches the DWARF debug + * info to get a variable and type information using the address, register, + * and offset. + */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +{ + struct map_symbol *ms = &he->ms; + struct evsel *evsel = hists_to_evsel(he->hists); + struct arch *arch; + struct disasm_line *dl; + struct annotated_insn_loc loc; + struct annotated_op_loc *op_loc; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + u64 ip = he->ip; + int i; + + ann_data_stat.total++; + + if (ms->map == NULL || ms->sym == NULL) { + ann_data_stat.no_sym++; + return NULL; + } + + if (!symbol_conf.init_annotation) { + ann_data_stat.no_sym++; + return NULL; + } + + if (evsel__get_arch(evsel, &arch) < 0) { + ann_data_stat.no_insn++; + return NULL; + } + + /* Make sure it runs objdump to get disasm of the function */ + symbol__ensure_annotate(ms, evsel); + + /* + * Get a disasm to extract the location from the insn. + * This is too slow... + */ + dl = find_disasm_line(ms->sym, ip); + if (dl == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + if (annotate_get_insn_location(arch, dl, &loc) < 0) { + ann_data_stat.no_insn_ops++; + istat->bad++; + return NULL; + } + + for_each_insn_op_loc(&loc, i, op_loc) { + if (!op_loc->mem_ref) + continue; + + mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); + if (mem_type) + istat->good++; + else + istat->bad++; + + if (symbol_conf.annotate_data_sample) { + annotated_data_type__update_samples(mem_type, evsel, + op_loc->offset, + he->stat.nr_events, + he->stat.period); + } + he->mem_type_off = op_loc->offset; + return mem_type; + } + + ann_data_stat.no_mem_ops++; + istat->bad++; + return NULL; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 962780559176..dba50762c6e8 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -23,6 +23,7 @@ struct option; struct perf_sample; struct evsel; struct symbol; +struct annotated_data_type; struct ins { const char *name; @@ -31,8 +32,6 @@ struct ins { struct ins_operands { char *raw; - char *raw_comment; - char *raw_func_start; struct { char *raw; char *name; @@ -41,22 +40,30 @@ struct ins_operands { s64 offset; bool offset_avail; bool outside; + bool multi_regs; } target; union { struct { char *raw; char *name; u64 addr; + bool multi_regs; } source; struct { struct ins ins; struct ins_operands *ops; } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; }; }; struct arch; +bool arch__is(struct arch *arch, const char *name); + struct ins_ops { void (*free)(struct ins_operands *ops); int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); @@ -101,6 +108,8 @@ struct annotation_options { unsigned int percent_type; }; +extern struct annotation_options annotate_opts; + enum { ANNOTATION__OFFSET_JUMP_TARGETS = 1, ANNOTATION__OFFSET_CALL, @@ -130,6 +139,13 @@ struct annotation_data { struct sym_hist_entry he; }; +struct cycles_info { + float ipc; + u64 avg; + u64 max; + u64 min; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -137,12 +153,9 @@ struct annotation_line { char *line; int line_nr; char *fileloc; - int jump_sources; - float ipc; - u64 cycles; - u64 cycles_max; - u64 cycles_min; char *path; + struct cycles_info *cycles; + int jump_sources; u32 idx; int idx_asm; int data_nr; @@ -214,8 +227,7 @@ struct annotation_write_ops { }; void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops, - struct annotation_options *opts); + struct annotation_write_ops *ops); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, @@ -264,27 +276,29 @@ struct cyc_hist { * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; - struct sym_hist *histograms; + struct list_head source; + size_t sizeof_sym_hist; + struct sym_hist *histograms; + struct annotation_line **offsets; + int nr_histograms; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; }; -struct LOCKABLE annotation { - u64 max_coverage; - u64 start; +struct annotated_branch { u64 hit_cycles; u64 hit_insn; unsigned int total_insn; unsigned int cover_insn; - struct annotation_options *options; - struct annotation_line **offsets; + struct cyc_hist *cycles_hist; + u64 max_coverage; +}; + +struct LOCKABLE annotation { + u64 start; int nr_events; int max_jump_sources; - int nr_entries; - int nr_asm_entries; - u16 max_line_len; struct { u8 addr; u8 jumps; @@ -293,8 +307,8 @@ struct LOCKABLE annotation { u8 max_addr; u8 max_ins_name; } widths; - bool have_cycles; struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -308,10 +322,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && annotate_opts.show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) @@ -319,13 +333,12 @@ static inline int annotation__pcnt_width(struct annotation *notes) return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } -static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +static inline bool annotation_line__filter(struct annotation_line *al) { - return notes->options->hide_src_code && al->offset == -1; + return annotate_opts.hide_src_code && al->offset == -1; } void annotation__set_offsets(struct annotation *notes, s64 size); -void annotation__compute_ipc(struct annotation *notes, size_t size); void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); @@ -349,6 +362,8 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, struct evsel *evsel); +struct annotated_branch *annotation__get_branch(struct annotation *notes); + int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); @@ -361,11 +376,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); enum symbol_disassemble_errno { @@ -392,43 +405,86 @@ enum symbol_disassemble_errno { int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options); +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts); +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); + struct hist_browser_timer *hbt); #else static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } #endif -void annotation_options__init(struct annotation_options *opt); -void annotation_options__exit(struct annotation_options *opt); +void annotation_options__init(void); +void annotation_options__exit(void); -void annotation_config__init(struct annotation_options *opt); +void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); -int annotate_check_args(struct annotation_options *args); +int annotate_check_args(void); + +/** + * struct annotated_op_loc - Location info of instruction operand + * @reg: Register in the operand + * @offset: Memory access offset in the operand + * @mem_ref: Whether the operand accesses memory + */ +struct annotated_op_loc { + int reg; + int offset; + bool mem_ref; +}; + +enum annotated_insn_ops { + INSN_OP_SOURCE = 0, + INSN_OP_TARGET = 1, + + INSN_OP_MAX, +}; + +/** + * struct annotated_insn_loc - Location info of instruction + * @ops: Array of location info for source and target operands + */ +struct annotated_insn_loc { + struct annotated_op_loc ops[INSN_OP_MAX]; +}; + +#define for_each_insn_op_loc(insn_loc, i, op_loc) \ + for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i]; \ + i < INSN_OP_MAX; \ + i++, op_loc++) + +/* Get detailed location info in the instruction */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc); + +/* Returns a data type from the sample instruction (if any) */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he); + +struct annotated_item_stat { + struct list_head list; + char *name; + int good; + int bad; +}; +extern struct list_head ann_insn_stat; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746..3684e6009b63 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); mp->mmap_needed = evsel->needs_auxtrace_mmap; @@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); if (per_cpu_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); @@ -1638,6 +1638,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'Z': synth_opts->timeless_decoding = true; break; + case 'T': + synth_opts->use_timestamp = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 29eb82dff574..55702215a82d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -99,6 +99,7 @@ enum itrace_period_type { * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @use_timestamp: use the timestamp trace as kernel time * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -146,6 +147,7 @@ struct itrace_synth_opts { bool remote_access; bool mem; bool timeless_decoding; + bool use_timestamp; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; @@ -678,6 +680,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " q: quicker (less detailed) decoding\n" \ " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ +" T: use the timestamp trace as kernel time\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is iybxwpe or cewp\n" diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385..dec910989701 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; @@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0a..96f53e89795e 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 680e92774d0c..15c42196c24c 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -311,6 +311,7 @@ done: double block_range__coverage(struct block_range *br) { struct symbol *sym; + struct annotated_branch *branch; if (!br) { if (block_ranges.blocks) @@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br) if (!sym) return -1; - return (double)br->coverage / symbol__annotation(sym)->max_coverage; + branch = symbol__annotation(sym)->branch; + if (!branch) + return -1; + + return (double)br->coverage / branch->max_coverage; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 38fcf3ba5749..3573e0b7ef3e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -386,6 +386,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, int err; int fd; + if (opts->no_bpf_event) + return 0; + event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; @@ -542,9 +545,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp) +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) { __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); @@ -560,7 +563,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, if (info->btf_id) { struct btf_node *node; - node = perf_env__find_btf(env, info->btf_id); + node = __perf_env__find_btf(env, info->btf_id); if (node) btf = btf__new((__u8 *)(node->data), node->data_size); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 1bcbd4fb6c66..e2f0420905f5 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,9 +33,9 @@ struct btf_node { int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp); +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -50,9 +50,9 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, return 0; } -static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, - struct perf_env *env __maybe_unused, - FILE *fp __maybe_unused) +static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) { } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7f9b0e46e008..7a8af60e0f51 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) return -1; if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new(NULL); + all_cpu_map = perf_cpu_map__new_online_cpus(); if (!all_cpu_map) return -1; } diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index f1716c089c99..31ff19afc20c 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -318,7 +318,7 @@ int lock_contention_read(struct lock_contention *con) } /* make sure it loads the kernel map */ - map__load(maps__first(machine->kmaps)->map); + maps__load_first(machine->kmaps); prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 0cd3369af2a4..b29109cd3609 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -3,6 +3,8 @@ #define PERF_COMPRESS_H #include <stdbool.h> +#include <stddef.h> +#include <sys/types.h> #ifdef HAVE_ZSTD_SUPPORT #include <zstd.h> #endif @@ -21,6 +23,7 @@ struct zstd_data { #ifdef HAVE_ZSTD_SUPPORT ZSTD_CStream *cstream; ZSTD_DStream *dstream; + int comp_level; #endif }; @@ -29,7 +32,7 @@ struct zstd_data { int zstd_init(struct zstd_data *data, int level); int zstd_fini(struct zstd_data *data); -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)); @@ -48,7 +51,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused) } static inline -size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, void *dst __maybe_unused, size_t dst_size __maybe_unused, void *src __maybe_unused, size_t src_size __maybe_unused, size_t max_record_size __maybe_unused, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0e090e8bc334..0581ee0fa5f2 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ static struct perf_cpu_map *online; if (!online) - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ return online; } diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 81cfc85f4668..8bbeb2dc76fd 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void) ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); return NULL; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a9873d14c632..d65d7485886c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3346,12 +3346,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; - /* Use virtual timestamps if all ETMs report ts_source = 1 */ - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + if (etm->synth_opts.use_timestamp) + /* + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, + * therefore the decoder cannot know if the timestamp trace is + * same with the kernel time. + * + * If a user has knowledge for the working platform and can + * specify itrace option 'T' to tell decoder to forcely use the + * traced timestamp as the kernel time. + */ + etm->has_virtual_ts = true; + else + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); if (!etm->has_virtual_ts) ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" - "The time field of the samples will not be set accurately.\n\n"); + "The time field of the samples will not be set accurately.\n" + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" + "you can specify the itrace option 'T' for timestamp decoding\n" + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b9fb71ab7a73..106429155c2e 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -253,8 +253,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ addr_location__init(&al); al.sym = node->ms.sym; - al.map = node->ms.map; - al.maps = thread__maps(thread); + al.map = map__get(node->ms.map); + al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; if (al.map && !al.sym) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 88378c4c5dd9..e282b4ceb4d2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -38,12 +38,21 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static FILE *debug_file; +static FILE *_debug_file; bool debug_display_time; +FILE *debug_file(void) +{ + if (!_debug_file) { + pr_warning_once("debug_file not set"); + debug_set_file(stderr); + } + return _debug_file; +} + void debug_set_file(FILE *file) { - debug_file = file; + _debug_file = file; } void debug_set_display_time(bool set) @@ -78,8 +87,8 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); } else { - ret = fprintf_time(debug_file); - ret += vfprintf(debug_file, fmt, args); + ret = fprintf_time(debug_file()); + ret += vfprintf(debug_file(), fmt, args); } } @@ -107,9 +116,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args) nsecs -= secs * NSEC_PER_SEC; usecs = nsecs / NSEC_PER_USEC; - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", - secs, usecs); - ret += vfprintf(stderr, fmt, args); + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); + ret += vfprintf(debug_file(), fmt, args); return ret; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f99468a7f681..de8870980d44 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -77,6 +77,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +FILE *debug_file(void); void debug_set_file(FILE *file); void debug_set_display_time(bool set); void perf_debug_setup(void); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c new file mode 100644 index 000000000000..19acf4775d35 --- /dev/null +++ b/tools/perf/util/debuginfo.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DWARF debug information handling code. Copied from probe-finder.c. + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/zalloc.h> + +#include "build-id.h" +#include "dso.h" +#include "debug.h" +#include "debuginfo.h" +#include "symbol.h" + +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include <elfutils/debuginfod.h> +#endif + +/* Dwarf FL wrappers */ +static char *debuginfo_path; /* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + + .section_address = dwfl_offline_section_address, + + /* We use this table for core files too. */ + .find_elf = dwfl_build_id_find_elf, +}; + +/* Get a Dwarf from offline image */ +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, + const char *path) +{ + GElf_Addr dummy; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) + goto error; + + dwfl_report_begin(dbg->dwfl); + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) + goto error; + + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) + goto error; + + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + + dwfl_report_end(dbg->dwfl, NULL, NULL); + + return 0; +error: + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + else + close(fd); + memset(dbg, 0, sizeof(*dbg)); + + return -ENOENT; +} + +static struct debuginfo *__debuginfo__new(const char *path) +{ + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) + return NULL; + + if (debuginfo__init_offline_dwarf(dbg, path) < 0) + zfree(&dbg); + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); + return dbg; +} + +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + struct build_id bid; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__put(dso); + +out: + /* if failed to open all distro debuginfo, open given binary */ + return dinfo ? : __debuginfo__new(path); +} + +void debuginfo__delete(struct debuginfo *dbg) +{ + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); + } +} + +/* For the kernel module, we need a special code to get a DIE */ +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; + } + } + return 0; +} + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h new file mode 100644 index 000000000000..4d65b8c605fc --- /dev/null +++ b/tools/perf/util/debuginfo.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PERF_DEBUGINFO_H +#define _PERF_DEBUGINFO_H + +#include <errno.h> +#include <linux/compiler.h> + +#ifdef HAVE_DWARF_SUPPORT + +#include "dwarf-aux.h" + +/* debug information structure */ +struct debuginfo { + Dwarf *dbg; + Dwfl_Module *mod; + Dwfl *dwfl; + Dwarf_Addr bias; + const unsigned char *build_id; +}; + +/* This also tries to open distro debuginfo */ +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); + +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + +#else /* HAVE_DWARF_SUPPORT */ + +/* dummy debug information structure */ +struct debuginfo { +}; + +static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) +{ + return NULL; +} + +static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) +{ +} + +static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, + Dwarf_Addr *offs __maybe_unused, + bool adjust_offset __maybe_unused) +{ + return -EINVAL; +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, + char **new_path); +#else /* HAVE_DEBUGINFOD_SUPPORT */ +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ + +#endif /* _PERF_DEBUGINFO_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 1f629b6fb7cf..22fd5fa806ed 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -31,6 +31,7 @@ #include "debug.h" #include "string2.h" #include "vdso.h" +#include "annotate-data.h" static const char * const debuglink_paths[] = { "%.0s%s", @@ -1327,6 +1328,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; + dso->data_types = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1370,6 +1372,8 @@ void dso__delete(struct dso *dso) symbols__delete(&dso->symbols); dso->symbol_names_len = 0; zfree(&dso->symbol_names); + annotated_data_type__tree_delete(&dso->data_types); + if (dso->short_name_allocated) { zfree((char **)&dso->short_name); dso->short_name_allocated = false; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3759de8c2267..ce9f3849a773 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -154,6 +154,8 @@ struct dso { size_t symbol_names_len; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; + struct rb_root data_types; + struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2941d88f2199..7aa5fee0da19 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1051,32 +1051,28 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, } /** - * die_get_typename - Get the name of given variable DIE - * @vr_die: a variable DIE + * die_get_typename_from_type - Get the name of given type DIE + * @type_die: a type DIE * @buf: a strbuf for result type name * - * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * Get the name of @type_die and stores it to @buf. Return 0 if succeeded. * and Return -ENOENT if failed to find type name. * Note that the result will stores typedef name if possible, and stores * "*(function_type)" if the type is a function pointer. */ -int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf) { - Dwarf_Die type; int tag, ret; const char *tmp = ""; - if (__die_get_real_type(vr_die, &type) == NULL) - return -ENOENT; - - tag = dwarf_tag(&type); + tag = dwarf_tag(type_die); if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ return strbuf_add(buf, "(function_type)", 15); } else { - const char *name = dwarf_diename(&type); + const char *name = dwarf_diename(type_die); if (tag == DW_TAG_union_type) tmp = "union "; @@ -1089,8 +1085,35 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) /* Write a base name */ return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } - ret = die_get_typename(&type, buf); - return ret ? ret : strbuf_addstr(buf, tmp); + ret = die_get_typename(type_die, buf); + if (ret < 0) { + /* void pointer has no type attribute */ + if (tag == DW_TAG_pointer_type && ret == -ENOENT) + return strbuf_addf(buf, "void*"); + + return ret; + } + return strbuf_addstr(buf, tmp); +} + +/** + * die_get_typename - Get the name of given variable DIE + * @vr_die: a variable DIE + * @buf: a strbuf for result type name + * + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * and Return -ENOENT if failed to find type name. + * Note that the result will stores typedef name if possible, and stores + * "*(function_type)" if the type is a function pointer. + */ +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +{ + Dwarf_Die type; + + if (__die_get_real_type(vr_die, &type) == NULL) + return -ENOENT; + + return die_get_typename_from_type(&type, buf); } /** @@ -1238,12 +1261,151 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } -#else -int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) + +/* Interval parameters for __die_find_var_reg_cb() */ +struct find_var_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Target memory address (for global data) */ + Dwarf_Addr addr; + /* Target register */ + unsigned reg; + /* Access offset, set for global data */ + int offset; +}; + +/* Max number of registers DW_OP_regN supports */ +#define DWARF_OP_DIRECT_REGS 32 + +/* Only checks direct child DIEs in the given scope. */ +static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + /* Assuming the location list is sorted by address */ + if (end < data->pc) + continue; + if (start > data->pc) + break; + + /* Only match with a simple case */ + if (data->reg < DWARF_OP_DIRECT_REGS) { + if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1) + return DIE_FIND_CB_END; + } else { + if (ops->atom == DW_OP_regx && ops->number == data->reg && + nops == 1) + return DIE_FIND_CB_END; + } + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_reg - Find a variable saved in a register + * @sc_die: a scope DIE + * @pc: the program address to find + * @reg: the register number to find + * @die_mem: a buffer to save the resulting DIE + * + * Find the variable DIE accessed by the given register. + */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem) +{ + struct find_var_data data = { + .pc = pc, + .reg = reg, + }; + return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); +} + +/* Only checks direct child DIEs in the given scope */ +static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Word size; + Dwarf_Die type_die; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (ops->atom != DW_OP_addr) + continue; + + if (data->addr < ops->number) + continue; + + if (data->addr == ops->number) { + /* Update offset relative to the start of the variable */ + data->offset = 0; + return DIE_FIND_CB_END; + } + + if (die_get_real_type(die_mem, &type_die) == NULL) + continue; + + if (dwarf_aggregate_size(&type_die, &size) < 0) + continue; + + if (data->addr >= ops->number + size) + continue; + + /* Update offset relative to the start of the variable */ + data->offset = data->addr - ops->number; + return DIE_FIND_CB_END; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_addr - Find variable located at given address + * @sc_die: a scope DIE + * @pc: the program address to find + * @addr: the data address to find + * @die_mem: a buffer to save the resulting DIE + * @offset: the offset in the resulting type + * + * Find the variable DIE located at the given address (in PC-relative mode). + * This is usually for global variables. + */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset) { - return -ENOTSUP; + struct find_var_data data = { + .pc = pc, + .addr = addr, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); + if (result) + *offset = data.offset; + return result; } #endif @@ -1425,3 +1587,56 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, *entrypc = postprologue_addr; } + +/* Internal parameters for __die_find_scope_cb() */ +struct find_scope_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Number of scopes found [output] */ + int nr; + /* Array of scopes found, 0 for the outermost one. [output] */ + Dwarf_Die *scopes; +}; + +static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_scope_data *data = arg; + + if (dwarf_haspc(die_mem, data->pc)) { + Dwarf_Die *tmp; + + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); + if (tmp == NULL) + return DIE_FIND_CB_END; + + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); + data->scopes = tmp; + data->nr++; + return DIE_FIND_CB_CHILD; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_scopes - Return a list of scopes including the address + * @cu_die: a compile unit DIE + * @pc: the address to find + * @scopes: the array of DIEs for scopes (result) + * + * This function does the same as the dwarf_getscopes() but doesn't follow + * the origins of inlined functions. It returns the number of scopes saved + * in the @scopes argument. The outer scope will be saved first (index 0) and + * the last one is the innermost scope at the @pc. + */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) +{ + struct find_scope_data data = { + .pc = pc, + }; + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); + + *scopes = data.scopes; + return data.nr; +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ec8bc1083bb..4e64caac6df8 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -116,12 +116,14 @@ Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, Dwarf_Die *die_mem); +/* Get the name of given type DIE */ +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf); + /* Get the name of given variable DIE */ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); /* Check if target program is compiled with optimization */ bool die_is_optimized_target(Dwarf_Die *cu_die); @@ -130,4 +132,49 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); -#endif +/* Get the list of including scopes */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); + +#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT + +/* Get byte offset range of given variable DIE */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); + +/* Find a variable saved in the 'reg' at given address */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem); + +/* Find a (global) variable located in the 'addr' */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset); + +#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} + +static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + int reg __maybe_unused, + Dwarf_Die *die_mem __maybe_unused) +{ + return NULL; +} + +static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + Dwarf_Addr addr __maybe_unused, + Dwarf_Die *die_mem __maybe_unused, + int *offset __maybe_unused) +{ + return NULL; +} + +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +#endif /* _DWARF_AUX_H */ diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 69cfaa5953bf..5b7f86c0063f 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,9 +5,12 @@ * Written by: Masami Hiramatsu <mhiramat@kernel.org> */ +#include <stdlib.h> +#include <string.h> #include <debug.h> #include <dwarf-regs.h> #include <elf.h> +#include <errno.h> #include <linux/kernel.h> #ifndef EM_AARCH64 @@ -68,3 +71,34 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) } return NULL; } + +__weak int get_arch_regnum(const char *name __maybe_unused) +{ + return -ENOTSUP; +} + +/* Return DWARF register number from architecture register name */ +int get_dwarf_regnum(const char *name, unsigned int machine) +{ + char *regname = strdup(name); + int reg = -1; + char *p; + + if (regname == NULL) + return -EINVAL; + + /* For convenience, remove trailing characters */ + p = strpbrk(regname, " ,)"); + if (p) + *p = '\0'; + + switch (machine) { + case EM_NONE: /* Generic arch - use host arch */ + reg = get_arch_regnum(regname); + break; + default: + pr_err("ELF MACHINE %x is not supported.\n", machine); + } + free(regname); + return reg; +} diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 44140b7f596a..a459374d0a1a 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "debug.h" #include "env.h" #include "util/header.h" +#include "linux/compiler.h" #include <linux/ctype.h> #include <linux/zalloc.h> #include "cgroup.h" @@ -12,6 +13,7 @@ #include <string.h> #include "pmus.h" #include "strbuf.h" +#include "trace/beauty/beauty.h" struct perf_env perf_env; @@ -23,12 +25,18 @@ struct perf_env perf_env; void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { + down_write(&env->bpf_progs.lock); + __perf_env__insert_bpf_prog_info(env, info_node); + up_write(&env->bpf_progs.lock); +} + +void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +{ __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; struct rb_node *parent = NULL; struct rb_node **p; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.infos.rb_node; while (*p != NULL) { @@ -40,15 +48,13 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - goto out; + return; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; -out: - up_write(&env->bpf_progs.lock); } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -78,13 +84,21 @@ out: bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_btf(env, btf_node); + up_write(&env->bpf_progs.lock); + return ret; +} + +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; - bool ret = true; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; while (*p != NULL) { @@ -96,25 +110,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); - ret = false; - goto out; + return false; } } rb_link_node(&btf_node->rb_node, parent, p); rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); env->bpf_progs.btfs_cnt++; -out: - up_write(&env->bpf_progs.lock); - return ret; + return true; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) { + struct btf_node *res; + + down_read(&env->bpf_progs.lock); + res = __perf_env__find_btf(env, btf_id); + up_read(&env->bpf_progs.lock); + return res; +} + +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ struct btf_node *node = NULL; struct rb_node *n; - down_read(&env->bpf_progs.lock); n = env->bpf_progs.btfs.rb_node; while (n) { @@ -124,13 +144,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - goto out; + return node; } - node = NULL; - -out: - up_read(&env->bpf_progs.lock); - return node; + return NULL; } /* purge data in bpf_progs.infos tree */ @@ -453,6 +469,18 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) +{ +#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + if (env->arch_strerrno == NULL) + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); + + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; +#else + return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; +#endif +} + const char *perf_env__cpuid(struct perf_env *env) { int status; @@ -531,6 +559,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) +{ + char *pmu_mapping = env->pmu_mappings, *colon; + + for (int i = 0; i < env->nr_pmu_mappings; ++i) { + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') + goto out_error; + + pmu_mapping = colon + 1; + if (strcmp(pmu_mapping, pmu_name) == 0) + return true; + + pmu_mapping += strlen(pmu_mapping) + 1; + } +out_error: + return false; +} + char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4566c51f2fd9..7c527e65c186 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -46,10 +46,17 @@ struct hybrid_node { struct pmu_caps { int nr_caps; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; + char **caps; char *pmu_name; }; +typedef const char *(arch_syscalls__strerrno_t)(int err); + +arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); + struct perf_env { char *hostname; char *os_release; @@ -62,6 +69,8 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; int kernel_is_64_bit; int nr_cmdline; @@ -130,6 +139,7 @@ struct perf_env { */ bool enabled; } clock; + arch_syscalls__strerrno_t *arch_strerrno; }; enum perf_compress_type { @@ -159,19 +169,26 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__arch_strerrno(struct perf_env *env, int err); const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); +void __perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); + +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 923c0fb15122..68f45e9e63b6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,13 +617,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; } else { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e36da58522ef..95f25e9fb994 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) return -1; if (target__uses_dummy_map(target)) - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); @@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) goto out; @@ -2518,3 +2518,33 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis } perf_cpu_map__put(user_requested_cpus); } + +void evlist__uniquify_name(struct evlist *evlist) +{ + char *new_name, empty_attributes[2] = ":", *attributes; + struct evsel *pos; + + if (perf_pmus__num_core_pmus() == 1) + return; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_hybrid(pos)) + continue; + + if (strchr(pos->name, '/')) + continue; + + attributes = strchr(pos->name, ':'); + if (attributes) + *attributes = '\0'; + else + attributes = empty_attributes; + + if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + free(pos->name); + pos->name = new_name; + } else { + *attributes = ':'; + } + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 98e7ddb2bd30..cb91dc9117a2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -442,5 +442,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); +void evlist__uniquify_name(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 72a5dfc38d38..6d7c9c58a9bc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, if (cpus == NULL) { if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.branch_counters) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { @@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.read_lost && + if (!perf_missing_features.branch_counters && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + return true; + } else if (!perf_missing_features.read_lost && (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { perf_missing_features.read_lost = true; pr_debug2("switching off PERF_FORMAT_LOST support\n"); @@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) return new_val; } +static inline bool evsel__has_branch_counters(const struct evsel *evsel) +{ + struct evsel *cur, *leader = evsel__leader(evsel); + + /* The branch counters feature only supports group */ + if (!leader || !evsel->evlist) + return false; + + evlist__for_each_entry(evsel->evlist, cur) { + if ((leader == evsel__leader(cur)) && + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + return true; + } + return false; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; + + if (evsel__has_branch_counters(evsel)) { + OVERFLOW_CHECK_u64(array); + + data->branch_stack_cntr = (u64 *)array; + sz = data->branch_stack->nr * sizeof(u64); + + OVERFLOW_CHECK(array, sz, max_size); + array = (void *)array + sz; + } } if (type & PERF_SAMPLE_REGS_USER) { @@ -2820,7 +2853,8 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const #endif -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize) { int paranoid; @@ -2828,18 +2862,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.type == PERF_TYPE_HARDWARE && evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* - * If it's cycles then fall back to hrtimer based - * cpu-clock-tick sw counter, which is always available even if - * no PMU support. + * If it's cycles then fall back to hrtimer based cpu-clock sw + * counter, which is always available even if no PMU support. * * PPC returns ENXIO until 2.6.37 (behavior changed with commit * b0a873e). */ - scnprintf(msg, msgsize, "%s", -"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); - evsel->core.attr.type = PERF_TYPE_SOFTWARE; - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; + evsel->core.attr.config = target__has_cpu(target) + ? PERF_COUNT_SW_CPU_CLOCK + : PERF_COUNT_SW_TASK_CLOCK; + scnprintf(msg, msgsize, + "The cycles event is not supported, trying to fall back to %s", + target__has_cpu(target) ? "cpu-clock" : "task-clock"); zfree(&evsel->name); return true; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d791316a1792..efbb6e848287 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -191,6 +191,7 @@ struct perf_missing_features { bool code_page_size; bool weight_struct; bool read_lost; + bool branch_counters; }; extern struct perf_missing_features perf_missing_features; @@ -459,7 +460,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel) evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize); int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size); diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fefc72066c4e..ac17a3cb59dc 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -293,9 +293,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e86b9439ffee..3fe28edc3d01 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1444,7 +1444,9 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) nodes = new_nodes; size += 4; } - ret = memory_node__read(&nodes[cnt++], idx); + ret = memory_node__read(&nodes[cnt], idx); + if (!ret) + cnt += 1; } out: closedir(dir); @@ -1847,8 +1849,8 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - bpf_event__print_bpf_prog_info(&node->info_linear->info, - env, fp); + __bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); @@ -2145,6 +2147,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } + + if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && + perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + + if (max_precise != NULL && atoi(max_precise) == 0) + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); + } } static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) @@ -3178,7 +3188,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + __perf_env__insert_bpf_prog_info(env, info_node); } up_write(&env->bpf_progs.lock); @@ -3225,7 +3235,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - perf_env__insert_btf(env, node); + __perf_env__insert_btf(env, node); node = NULL; } @@ -3259,7 +3269,9 @@ static int process_compressed(struct feat_fd *ff, } static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, - char ***caps, unsigned int *max_branches) + char ***caps, unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) { char *name, *value, *ptr; u32 nr_pmu_caps, i; @@ -3294,6 +3306,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, if (!strcmp(name, "branches")) *max_branches = atoi(value); + if (!strcmp(name, "branch_counter_nr")) + *br_cntr_nr = atoi(value); + + if (!strcmp(name, "branch_counter_width")) + *br_cntr_width = atoi(value); + free(value); free(name); } @@ -3318,7 +3336,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, { int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + &ff->ph->env.max_branches, + &ff->ph->env.br_cntr_nr, + &ff->ph->env.br_cntr_width); if (!ret && !ff->ph->env.cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); @@ -3347,7 +3367,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) for (i = 0; i < nr_pmu; i++) { ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, &pmu_caps[i].caps, - &pmu_caps[i].max_branches); + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); if (ret) goto err; @@ -4369,9 +4391,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev->cpus.cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 43bd1ca62d58..52d0ce302ca0 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -123,6 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, if (dump_trace) hisi_ptt_dump_event(ptt, data, size); + free(data); return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc..4a0aea0c9e00 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -82,6 +82,9 @@ enum hist_column { HISTC_ADDR_TO, HISTC_ADDR, HISTC_SIMD, + HISTC_TYPE, + HISTC_TYPE_OFFSET, + HISTC_SYMBOL_OFFSET, HISTC_NR_COLS, /* Last entry */ }; @@ -457,7 +460,6 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; struct res_sample; enum rstype { @@ -473,16 +475,13 @@ struct block_hist; void attr_to_script(char *buf, struct perf_event_attr *attr); int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +491,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,23 +499,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -541,8 +536,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 7d99a084e82d..01fb25a1150a 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,6 +2,9 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ +#define DWARF_REG_PC 0xd3af9c /* random number */ +#define DWARF_REG_FB 0xd3affb /* random number */ + #ifdef HAVE_DWARF_SUPPORT const char *get_arch_regstr(unsigned int n); /* @@ -10,6 +13,22 @@ const char *get_arch_regstr(unsigned int n); * machine: ELF machine signature (EM_*) */ const char *get_dwarf_regstr(unsigned int n, unsigned int machine); + +int get_arch_regnum(const char *name); +/* + * get_dwarf_regnum - Returns DWARF regnum from register name + * name: architecture register name + * machine: ELF machine signature (EM_*) + */ +int get_dwarf_regnum(const char *name, unsigned int machine); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline int get_dwarf_regnum(const char *name __maybe_unused, + unsigned int machine __maybe_unused) +{ + return -1; +} #endif #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 90c750150b19..b397a769006f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -453,7 +453,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, thread__maps(host_thread)); + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; @@ -1285,33 +1285,46 @@ static u64 find_entry_trampoline(struct dso *dso) #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 #define X86_64_ENTRY_TRAMPOLINE 0x6000 +struct machine__map_x86_64_entry_trampolines_args { + struct maps *kmaps; + bool found; +}; + +static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) +{ + struct machine__map_x86_64_entry_trampolines_args *args = data; + struct map *dest_map; + struct kmap *kmap = __map__kmap(map); + + if (!kmap || !is_entry_trampoline(kmap->name)) + return 0; + + dest_map = maps__find(args->kmaps, map__pgoff(map)); + if (dest_map != map) + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); + + args->found = true; + return 0; +} + /* Map x86_64 PTI entry trampolines */ int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = machine__kernel_maps(machine); + struct machine__map_x86_64_entry_trampolines_args args = { + .kmaps = machine__kernel_maps(machine), + .found = false, + }; int nr_cpus_avail, cpu; - bool found = false; - struct map_rb_node *rb_node; u64 pgoff; /* * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(kmaps, rb_node) { - struct map *dest_map, *map = rb_node->map; - struct kmap *kmap = __map__kmap(map); - - if (!kmap || !is_entry_trampoline(kmap->name)) - continue; + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); - dest_map = maps__find(kmaps, map__pgoff(map)); - if (dest_map != map) - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); - found = true; - } - if (found || machine->trampolines_mapped) + if (args.found || machine->trampolines_mapped) return 0; pgoff = find_entry_trampoline(kernel); @@ -1359,8 +1372,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) if (machine->vmlinux_map == NULL) return -ENOMEM; - map__set_map_ip(machine->vmlinux_map, identity__map_ip); - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); } @@ -1750,12 +1762,11 @@ int machine__create_kernel_maps(struct machine *machine) if (end == ~0ULL) { /* update end address of the kernel map using adjacent module address */ - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), - machine__kernel_map(machine)); - struct map_rb_node *next = map_rb_node__next(rb_node); + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), + machine__kernel_map(machine)); if (next) - machine__set_kernel_mmap(machine, start, map__start(next->map)); + machine__set_kernel_mmap(machine, start, map__start(next)); } out_put: @@ -2157,9 +2168,13 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) - thread__put(thread); - + if (thread != NULL) { + if (symbol_conf.keep_exited_threads) + thread__set_exited(thread, /*exited=*/true); + else + machine__remove_thread(machine, thread); + } + thread__put(thread); return 0; } @@ -3395,16 +3410,8 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) { struct maps *maps = machine__kernel_maps(machine); - struct map_rb_node *pos; - int err = 0; - maps__for_each_entry(maps, pos) { - err = fn(pos->map, priv); - if (err != 0) { - break; - } - } - return err; + return maps__for_each_map(maps, fn, priv); } bool machine__is_lock_function(struct machine *machine, u64 addr) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f64b83004421..54c67cb7ecef 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -109,8 +109,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) map__set_pgoff(map, pgoff); map__set_reloc(map, 0); map__set_dso(map, dso__get(dso)); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); } @@ -172,7 +171,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map__init(result, start, start + len, pgoff, dso); if (anon || no_dso) { - map->map_ip = map->unmap_ip = identity__map_ip; + map->mapping_type = MAPPING_TYPE__IDENTITY; /* * Set memory without DSO as loaded. All map__find_* @@ -630,18 +629,3 @@ struct maps *map__kmaps(struct map *map) } return kmap->kmaps; } - -u64 map__dso_map_ip(const struct map *map, u64 ip) -{ - return ip - map__start(map) + map__pgoff(map); -} - -u64 map__dso_unmap_ip(const struct map *map, u64 ip) -{ - return ip + map__start(map) - map__pgoff(map); -} - -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) -{ - return ip; -} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1b53d53adc86..49756716cb13 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -16,23 +16,25 @@ struct dso; struct maps; struct machine; +enum mapping_type { + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ + MAPPING_TYPE__DSO, + /* map__map_ip/map__unmap_ip are just the given ip value. */ + MAPPING_TYPE__IDENTITY, +}; + DECLARE_RC_STRUCT(map) { u64 start; u64 end; - bool erange_warned:1; - bool priv:1; - u32 prot; u64 pgoff; u64 reloc; - - /* ip -> dso rip */ - u64 (*map_ip)(const struct map *, u64); - /* dso rip -> ip */ - u64 (*unmap_ip)(const struct map *, u64); - struct dso *dso; refcount_t refcnt; + u32 prot; u32 flags; + enum mapping_type mapping_type:8; + bool erange_warned; + bool priv; }; struct kmap; @@ -41,38 +43,11 @@ struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct maps *map__kmaps(struct map *map); -/* ip -> dso rip */ -u64 map__dso_map_ip(const struct map *map, u64 ip); -/* dso rip -> ip */ -u64 map__dso_unmap_ip(const struct map *map, u64 ip); -/* Returns ip */ -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); - static inline struct dso *map__dso(const struct map *map) { return RC_CHK_ACCESS(map)->dso; } -static inline u64 map__map_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->map_ip(map, ip); -} - -static inline u64 map__unmap_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); -} - -static inline void *map__map_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->map_ip; -} - -static inline void* map__unmap_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->unmap_ip; -} - static inline u64 map__start(const struct map *map) { return RC_CHK_ACCESS(map)->start; @@ -123,6 +98,34 @@ static inline size_t map__size(const struct map *map) return map__end(map) - map__start(map); } +/* ip -> dso rip */ +static inline u64 map__dso_map_ip(const struct map *map, u64 ip) +{ + return ip - map__start(map) + map__pgoff(map); +} + +/* dso rip -> ip */ +static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) +{ + return rip + map__start(map) - map__pgoff(map); +} + +static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_map_ip(map, ip_or_rip); + else + return ip_or_rip; +} + +static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_unmap_ip(map, ip_or_rip); + else + return ip_or_rip; +} + /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); @@ -294,13 +297,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso) RC_CHK_ACCESS(map)->dso = dso; } -static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) +static inline void map__set_mapping_type(struct map *map, enum mapping_type type) { - RC_CHK_ACCESS(map)->map_ip = map_ip; + RC_CHK_ACCESS(map)->mapping_type = type; } -static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) +static inline enum mapping_type map__mapping_type(struct map *map) { - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; + return RC_CHK_ACCESS(map)->mapping_type; } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 233438c95b53..0334fc18d9c6 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -10,6 +10,68 @@ #include "ui/ui.h" #include "unwind.h" +struct map_rb_node { + struct rb_node rb_node; + struct map *map; +}; + +#define maps__for_each_entry(maps, map) \ + for (map = maps__first(maps); map; map = map_rb_node__next(map)) + +#define maps__for_each_entry_safe(maps, map, next) \ + for (map = maps__first(maps), next = map_rb_node__next(map); map; \ + map = next, next = map_rb_node__next(map)) + +static struct rb_root *maps__entries(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->entries; +} + +static struct rw_semaphore *maps__lock(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->lock; +} + +static struct map **maps__maps_by_name(struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_name; +} + +static struct map_rb_node *maps__first(struct maps *maps) +{ + struct rb_node *first = rb_first(maps__entries(maps)); + + if (first) + return rb_entry(first, struct map_rb_node, rb_node); + return NULL; +} + +static struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + + next = rb_next(&node->rb_node); + + if (!next) + return NULL; + + return rb_entry(next, struct map_rb_node, rb_node); +} + +static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node; + + maps__for_each_entry(maps, rb_node) { + if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) + return rb_node; + } + return NULL; +} + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); @@ -196,6 +258,41 @@ void maps__put(struct maps *maps) RC_CHK_PUT(maps); } +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos; + int ret = 0; + + down_read(maps__lock(maps)); + maps__for_each_entry(maps, pos) { + ret = cb(pos->map, data); + if (ret) + break; + } + up_read(maps__lock(maps)); + return ret; +} + +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos, *next; + unsigned int start_nr_maps; + + down_write(maps__lock(maps)); + + start_nr_maps = maps__nr_maps(maps); + maps__for_each_entry_safe(maps, pos, next) { + if (cb(pos->map, data)) { + __maps__remove(maps, pos); + --RC_CHK_ACCESS(maps)->nr_maps; + } + } + if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps)) + __maps__free_maps_by_name(maps); + + up_write(maps__lock(maps)); +} + struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { struct map *map = maps__find(maps, addr); @@ -210,31 +307,40 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return NULL; } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ +struct maps__find_symbol_by_name_args { + struct map **mapp; + const char *name; struct symbol *sym; - struct map_rb_node *pos; +}; - down_read(maps__lock(maps)); +static int maps__find_symbol_by_name_cb(struct map *map, void *data) +{ + struct maps__find_symbol_by_name_args *args = data; - maps__for_each_entry(maps, pos) { - sym = map__find_symbol_by_name(pos->map, name); + args->sym = map__find_symbol_by_name(map, args->name); + if (!args->sym) + return 0; - if (sym == NULL) - continue; - if (!map__contains_symbol(pos->map, sym)) { - sym = NULL; - continue; - } - if (mapp != NULL) - *mapp = pos->map; - goto out; + if (!map__contains_symbol(map, args->sym)) { + args->sym = NULL; + return 0; } - sym = NULL; -out: - up_read(maps__lock(maps)); - return sym; + if (args->mapp != NULL) + *args->mapp = map__get(map); + return 1; +} + +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +{ + struct maps__find_symbol_by_name_args args = { + .mapp = mapp, + .name = name, + .sym = NULL, + }; + + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); + return args.sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) @@ -253,41 +359,46 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -size_t maps__fprintf(struct maps *maps, FILE *fp) -{ - size_t printed = 0; - struct map_rb_node *pos; +struct maps__fprintf_args { + FILE *fp; + size_t printed; +}; - down_read(maps__lock(maps)); +static int maps__fprintf_cb(struct map *map, void *data) +{ + struct maps__fprintf_args *args = data; - maps__for_each_entry(maps, pos) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos->map, fp); - if (verbose > 2) { - printed += dso__fprintf(map__dso(pos->map), fp); - printed += fprintf(fp, "--\n"); - } + args->printed += fprintf(args->fp, "Map:"); + args->printed += map__fprintf(map, args->fp); + if (verbose > 2) { + args->printed += dso__fprintf(map__dso(map), args->fp); + args->printed += fprintf(args->fp, "--\n"); } + return 0; +} - up_read(maps__lock(maps)); +size_t maps__fprintf(struct maps *maps, FILE *fp) +{ + struct maps__fprintf_args args = { + .fp = fp, + .printed = 0, + }; + + maps__for_each_map(maps, maps__fprintf_cb, &args); - return printed; + return args.printed; } -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) +/* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ +static struct rb_node *first_ending_after(struct maps *maps, const struct map *map) { struct rb_root *root; struct rb_node *next, *first; - int err = 0; - - down_write(maps__lock(maps)); root = maps__entries(maps); - - /* - * Find first map where end > map->start. - * Same as find_vma() in kernel. - */ next = root->rb_node; first = NULL; while (next) { @@ -301,8 +412,23 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) } else next = next->rb_right; } + return first; +} - next = first; +/* + * Adds new to maps, if new overlaps existing entries then the existing maps are + * adjusted or removed so that new fits without overlapping any entries. + */ +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) +{ + + struct rb_node *next; + int err = 0; + FILE *fp = debug_file(); + + down_write(maps__lock(maps)); + + next = first_ending_after(maps, new); while (next && !err) { struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); next = rb_next(&pos->rb_node); @@ -311,27 +437,27 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) * Stop if current map starts after map->end. * Maps are ordered by start: next will not overlap for sure. */ - if (map__start(pos->map) >= map__end(map)) + if (map__start(pos->map) >= map__end(new)) break; if (verbose >= 2) { if (use_browser) { pr_debug("overlapping maps in %s (disable tui for more info)\n", - map__dso(map)->name); + map__dso(new)->name); } else { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); + pr_debug("overlapping maps:\n"); + map__fprintf(new, fp); map__fprintf(pos->map, fp); } } - rb_erase_init(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, maps__entries(maps)); /* * Now check if we need to create new maps for areas not * overlapped by the new map: */ - if (map__start(map) > map__start(pos->map)) { + if (map__start(new) > map__start(pos->map)) { struct map *before = map__clone(pos->map); if (before == NULL) { @@ -339,7 +465,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_end(before, map__start(map)); + map__set_end(before, map__start(new)); err = __maps__insert(maps, before); if (err) { map__put(before); @@ -351,7 +477,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) map__put(before); } - if (map__end(map) < map__end(pos->map)) { + if (map__end(new) < map__end(pos->map)) { struct map *after = map__clone(pos->map); if (after == NULL) { @@ -359,10 +485,10 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_start(after, map__end(map)); - map__add_pgoff(after, map__end(map) - map__start(pos->map)); - assert(map__map_ip(pos->map, map__end(map)) == - map__map_ip(after, map__end(map))); + map__set_start(after, map__end(new)); + map__add_pgoff(after, map__end(new) - map__start(pos->map)); + assert(map__map_ip(pos->map, map__end(new)) == + map__map_ip(after, map__end(new))); err = __maps__insert(maps, after); if (err) { map__put(after); @@ -376,16 +502,14 @@ put_map: map__put(pos->map); free(pos); } + /* Add the map. */ + err = __maps__insert(maps, new); up_write(maps__lock(maps)); return err; } -/* - * XXX This should not really _copy_ te maps, but refcount them. - */ -int maps__clone(struct thread *thread, struct maps *parent) +int maps__copy_from(struct maps *maps, struct maps *parent) { - struct maps *maps = thread__maps(thread); int err; struct map_rb_node *rb_node; @@ -416,17 +540,6 @@ out_unlock: return err; } -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) -{ - struct map_rb_node *rb_node; - - maps__for_each_entry(maps, rb_node) { - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) - return rb_node; - } - return NULL; -} - struct map *maps__find(struct maps *maps, u64 ip) { struct rb_node *p; @@ -452,26 +565,275 @@ out: return m ? m->map : NULL; } -struct map_rb_node *maps__first(struct maps *maps) +static int map__strcmp(const void *a, const void *b) { - struct rb_node *first = rb_first(maps__entries(maps)); + const struct map *map_a = *(const struct map **)a; + const struct map *map_b = *(const struct map **)b; + const struct dso *dso_a = map__dso(map_a); + const struct dso *dso_b = map__dso(map_b); + int ret = strcmp(dso_a->short_name, dso_b->short_name); - if (first) - return rb_entry(first, struct map_rb_node, rb_node); - return NULL; + if (ret == 0 && map_a != map_b) { + /* + * Ensure distinct but name equal maps have an order in part to + * aid reference counting. + */ + ret = (int)map__start(map_a) - (int)map__start(map_b); + if (ret == 0) + ret = (int)((intptr_t)map_a - (intptr_t)map_b); + } + + return ret; } -struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +static int map__strcmp_name(const void *name, const void *b) { - struct rb_node *next; + const struct dso *dso = map__dso(*(const struct map **)b); - if (!node) - return NULL; + return strcmp(name, dso->short_name); +} - next = rb_next(&node->rb_node); +void __maps__sort_by_name(struct maps *maps) +{ + qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); +} - if (!next) +static int map__groups__sort_by_name_from_rbtree(struct maps *maps) +{ + struct map_rb_node *rb_node; + struct map **maps_by_name = realloc(maps__maps_by_name(maps), + maps__nr_maps(maps) * sizeof(struct map *)); + int i = 0; + + if (maps_by_name == NULL) + return -1; + + up_read(maps__lock(maps)); + down_write(maps__lock(maps)); + + RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; + RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); + + maps__for_each_entry(maps, rb_node) + maps_by_name[i++] = map__get(rb_node->map); + + __maps__sort_by_name(maps); + + up_write(maps__lock(maps)); + down_read(maps__lock(maps)); + + return 0; +} + +static struct map *__maps__find_by_name(struct maps *maps, const char *name) +{ + struct map **mapp; + + if (maps__maps_by_name(maps) == NULL && + map__groups__sort_by_name_from_rbtree(maps)) return NULL; - return rb_entry(next, struct map_rb_node, rb_node); + mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), + sizeof(*mapp), map__strcmp_name); + if (mapp) + return *mapp; + return NULL; +} + +struct map *maps__find_by_name(struct maps *maps, const char *name) +{ + struct map_rb_node *rb_node; + struct map *map; + + down_read(maps__lock(maps)); + + + if (RC_CHK_ACCESS(maps)->last_search_by_name) { + const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); + + if (strcmp(dso->short_name, name) == 0) { + map = RC_CHK_ACCESS(maps)->last_search_by_name; + goto out_unlock; + } + } + /* + * If we have maps->maps_by_name, then the name isn't in the rbtree, + * as maps->maps_by_name mirrors the rbtree when lookups by name are + * made. + */ + map = __maps__find_by_name(maps, name); + if (map || maps__maps_by_name(maps) != NULL) + goto out_unlock; + + /* Fallback to traversing the rbtree... */ + maps__for_each_entry(maps, rb_node) { + struct dso *dso; + + map = rb_node->map; + dso = map__dso(map); + if (strcmp(dso->short_name, name) == 0) { + RC_CHK_ACCESS(maps)->last_search_by_name = map; + goto out_unlock; + } + } + map = NULL; + +out_unlock: + up_read(maps__lock(maps)); + return map; +} + +struct map *maps__find_next_entry(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node = maps__find_node(maps, map); + struct map_rb_node *next = map_rb_node__next(rb_node); + + if (next) + return next->map; + + return NULL; +} + +void maps__fixup_end(struct maps *maps) +{ + struct map_rb_node *prev = NULL, *curr; + + down_write(maps__lock(maps)); + + maps__for_each_entry(maps, curr) { + if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map))) + map__set_end(prev->map, map__start(curr->map)); + + prev = curr; + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + if (curr && !map__end(curr->map)) + map__set_end(curr->map, ~0ULL); + + up_write(maps__lock(maps)); +} + +/* + * Merges map into maps by splitting the new map within the existing map + * regions. + */ +int maps__merge_in(struct maps *kmaps, struct map *new_map) +{ + struct map_rb_node *rb_node; + struct rb_node *first; + bool overlaps; + LIST_HEAD(merged); + int err = 0; + + down_read(maps__lock(kmaps)); + first = first_ending_after(kmaps, new_map); + rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL; + overlaps = rb_node && map__start(rb_node->map) < map__end(new_map); + up_read(maps__lock(kmaps)); + + if (!overlaps) + return maps__insert(kmaps, new_map); + + maps__for_each_entry(kmaps, rb_node) { + struct map *old_map = rb_node->map; + + /* no overload with this one */ + if (map__end(new_map) < map__start(old_map) || + map__start(new_map) >= map__end(old_map)) + continue; + + if (map__start(new_map) < map__start(old_map)) { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new......| -> |new..| + * |old....| -> |old....| + */ + map__set_end(new_map, map__start(old_map)); + } else { + /* + * |new.............| -> |new..| |new..| + * |old....| -> |old....| + */ + struct map_list_node *m = map_list_node__new(); + + if (!m) { + err = -ENOMEM; + goto out; + } + + m->map = map__clone(new_map); + if (!m->map) { + free(m); + err = -ENOMEM; + goto out; + } + + map__set_end(m->map, map__start(old_map)); + list_add_tail(&m->node, &merged); + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } else { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new..| -> x + * |old.........| -> |old.........| + */ + map__put(new_map); + new_map = NULL; + break; + } else { + /* + * |new......| -> |new...| + * |old....| -> |old....| + */ + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } + } + +out: + while (!list_empty(&merged)) { + struct map_list_node *old_node; + + old_node = list_entry(merged.next, struct map_list_node, node); + list_del_init(&old_node->node); + if (!err) + err = maps__insert(kmaps, old_node->map); + map__put(old_node->map); + free(old_node); + } + + if (new_map) { + if (!err) + err = maps__insert(kmaps, new_map); + map__put(new_map); + } + return err; +} + +void maps__load_first(struct maps *maps) +{ + struct map_rb_node *first; + + down_read(maps__lock(maps)); + + first = maps__first(maps); + if (first) + map__load(first->map); + + up_read(maps__lock(maps)); } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 83144e0645ed..d836d04c9402 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -14,24 +14,18 @@ struct ref_reloc_sym; struct machine; struct map; struct maps; -struct thread; -struct map_rb_node { - struct rb_node rb_node; +struct map_list_node { + struct list_head node; struct map *map; }; -struct map_rb_node *maps__first(struct maps *maps); -struct map_rb_node *map_rb_node__next(struct map_rb_node *node); -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); - -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map_rb_node__next(map)) +static inline struct map_list_node *map_list_node__new(void) +{ + return malloc(sizeof(struct map_list_node)); +} -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ - map = next, next = map_rb_node__next(map)) +struct map *maps__find(struct maps *maps, u64 addr); DECLARE_RC_STRUCT(maps) { struct rb_root entries; @@ -58,7 +52,7 @@ struct kmap { struct maps *maps__new(struct machine *machine); bool maps__empty(struct maps *maps); -int maps__clone(struct thread *thread, struct maps *parent); +int maps__copy_from(struct maps *maps, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); @@ -71,26 +65,16 @@ static inline void __maps__zput(struct maps **map) #define maps__zput(map) __maps__zput(&map) -static inline struct rb_root *maps__entries(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->entries; -} +/* Iterate over map calling cb for each entry. */ +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); +/* Iterate over map removing an entry if cb returns true. */ +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); static inline struct machine *maps__machine(struct maps *maps) { return RC_CHK_ACCESS(maps)->machine; } -static inline struct rw_semaphore *maps__lock(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->lock; -} - -static inline struct map **maps__maps_by_name(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->maps_by_name; -} - static inline unsigned int maps__nr_maps(const struct maps *maps) { return RC_CHK_ACCESS(maps)->nr_maps; @@ -125,12 +109,18 @@ struct addr_map_symbol; int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); +struct map *maps__find_next_entry(struct maps *maps, struct map *map); + int maps__merge_in(struct maps *kmaps, struct map *new_map); void __maps__sort_by_name(struct maps *maps); +void maps__fixup_end(struct maps *maps); + +void maps__load_first(struct maps *maps); + #endif // __PERF_MAPS_H diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 954b235e12e5..3a2e3687878c 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str) return -1; } -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu, + struct perf_mem_event *e) { + char sysfs_name[100]; char path[PATH_MAX]; struct stat st; + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); return !stat(path, &st); } @@ -120,7 +123,6 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; /* @@ -136,12 +138,12 @@ int perf_mem_events__init(void) * of core PMU. */ while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); + e->supported |= perf_mem_event__supported(mnt, pmu, e); + if (e->supported) { + found = true; + break; + } } - - if (e->supported) - found = true; } return found ? 0 : -ENOENT; @@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int idx) { const char *mnt = sysfs__mount(); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, - pmu->name); - if (!perf_mem_event__supported(mnt, sysfs_name)) { + if (!perf_mem_event__supported(mnt, pmu, e)) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(idx, pmu->name)); } @@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **rec_tmp, int *tmp_nr) { + const char *mnt = sysfs__mount(); int i = *argv_nr, k = 0; struct perf_mem_event *e; @@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, while ((pmu = perf_pmus__scan(pmu)) != NULL) { const char *s = perf_mem_events__name(j, pmu->name); + if (!perf_mem_event__supported(mnt, pmu, e)) + continue; + rec_argv[i++] = "-e"; if (s) { char *copy = strdup(s); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 49093b21ee2d..122ee198a86e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -295,15 +295,14 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; - map->comp_level = mp->comp_level; #ifndef PYTHON_PERF - if (zstd_init(&map->zstd_data, map->comp_level)) { + if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } #endif - if (map->comp_level && !perf_mmap__aio_enabled(map)) { + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f944c3cd5efa..0df6e1621c7e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,7 +39,6 @@ struct mmap { #endif struct mmap_cpu_mask affinity_mask; void *data; - int comp_level; struct perf_data_file *file; struct zstd_data zstd_data; }; diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index fd67d204d720..f7f7aff3d85a 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), BRANCH_END }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aa2f5c6fc7fc..66eabcea4242 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_error *err) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -986,15 +986,23 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { + /* + * Rewrite the PMU event to a legacy cache one unless the PMU + * doesn't support legacy cache events or the event is present + * within the PMU. + */ + if (perf_pmu__supports_legacy_cache(pmu) && + !perf_pmu__have_event(pmu, term->config)) { attr->type = PERF_TYPE_HW_CACHE; return parse_events__decode_legacy_cache(term->config, pmu->type, &attr->config); - } else + } else { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } } if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -1004,10 +1012,19 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + /* + * If the PMU has a sysfs or json event prefer it over + * legacy. ARM requires this. + */ + if (perf_pmu__have_event(pmu, term->config)) { + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } else { + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1381,6 +1398,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, YYLTYPE *loc = loc_; LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; + bool alias_rewrote_terms = false; pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1433,7 +1451,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) { + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); + return -EINVAL; + } + + /* Look for event names in the terms and rewrite into format based terms. */ + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1447,11 +1473,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index e1e2d701599c..1de3b69cdf4a 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) struct perf_cpu cpu; int ret, i = 0; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; cpu = perf_cpu_map__cpu(cpus, 0); @@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void) struct perf_cpu cpu; int fd; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 2247991451f3..8f04d3b7f3ec 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), + bit_name(COUNTERS), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d3c9aa4326be..3c9609944a2f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1494,12 +1494,14 @@ static int check_info_data(struct perf_pmu *pmu, * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err) + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1521,7 +1523,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } - + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; @@ -1615,6 +1617,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { + if (!name) + return false; if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d2895d415f08..424c3fee0949 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -201,7 +201,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err); + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1a5b7fa459b2..a1a796043691 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -149,10 +149,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, return 0; } +struct kernel_get_module_map_cb_args { + const char *module; + struct map *result; +}; + +static int kernel_get_module_map_cb(struct map *map, void *data) +{ + struct kernel_get_module_map_cb_args *args = data; + struct dso *dso = map__dso(map); + const char *short_name = dso->short_name; /* short_name is "[module]" */ + u16 short_name_len = dso->short_name_len; + + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && + args->module[short_name_len - 2] == '\0') { + args->result = map__get(map); + return 1; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { - struct maps *maps = machine__kernel_maps(host_machine); - struct map_rb_node *pos; + struct kernel_get_module_map_cb_args args = { + .module = module, + .result = NULL, + }; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) @@ -164,19 +186,9 @@ static struct map *kernel_get_module_map(const char *module) return map__get(map); } - maps__for_each_entry(maps, pos) { - /* short_name is "[module]" */ - struct dso *dso = map__dso(pos->map); - const char *short_name = dso->short_name; - u16 short_name_len = dso->short_name_len; + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); - if (strncmp(short_name + 1, module, - short_name_len - 2) == 0 && - module[short_name_len - 2] == '\0') { - return map__get(pos->map); - } - } - return NULL; + return args.result; } struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f171360b0ef4..c8923375e30d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -23,6 +23,7 @@ #include "event.h" #include "dso.h" #include "debug.h" +#include "debuginfo.h" #include "intlist.h" #include "strbuf.h" #include "strlist.h" @@ -31,128 +32,9 @@ #include "probe-file.h" #include "string2.h" -#ifdef HAVE_DEBUGINFOD_SUPPORT -#include <elfutils/debuginfod.h> -#endif - /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Dwarf FL wrappers */ -static char *debuginfo_path; /* Currently dummy */ - -static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .section_address = dwfl_offline_section_address, - - /* We use this table for core files too. */ - .find_elf = dwfl_build_id_find_elf, -}; - -/* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, - const char *path) -{ - GElf_Addr dummy; - int fd; - - fd = open(path, O_RDONLY); - if (fd < 0) - return fd; - - dbg->dwfl = dwfl_begin(&offline_callbacks); - if (!dbg->dwfl) - goto error; - - dwfl_report_begin(dbg->dwfl); - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); - if (!dbg->mod) - goto error; - - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); - if (!dbg->dbg) - goto error; - - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); - - dwfl_report_end(dbg->dwfl, NULL, NULL); - - return 0; -error: - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - else - close(fd); - memset(dbg, 0, sizeof(*dbg)); - - return -ENOENT; -} - -static struct debuginfo *__debuginfo__new(const char *path) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - if (!dbg) - return NULL; - - if (debuginfo__init_offline_dwarf(dbg, path) < 0) - zfree(&dbg); - if (dbg) - pr_debug("Open Debuginfo file: %s\n", path); - return dbg; -} - -enum dso_binary_type distro_dwarf_types[] = { - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__NOT_FOUND, -}; - -struct debuginfo *debuginfo__new(const char *path) -{ - enum dso_binary_type *type; - char buf[PATH_MAX], nil = '\0'; - struct dso *dso; - struct debuginfo *dinfo = NULL; - struct build_id bid; - - /* Try to open distro debuginfo files */ - dso = dso__new(path); - if (!dso) - goto out; - - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) - dso__set_build_id(dso, &bid); - - for (type = distro_dwarf_types; - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; - type++) { - if (dso__read_binary_type_filename(dso, *type, &nil, - buf, PATH_MAX) < 0) - continue; - dinfo = __debuginfo__new(buf); - } - dso__put(dso); - -out: - /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); -} - -void debuginfo__delete(struct debuginfo *dbg) -{ - if (dbg) { - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - free(dbg); - } -} - /* * Probe finder related functions */ @@ -722,7 +604,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -733,7 +615,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1258,7 +1140,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->machine = ehdr.e_machine; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1268,7 +1150,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1677,44 +1559,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } -/* For the kernel module, we need a special code to get a DIE */ -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset) -{ - int n, i; - Elf32_Word shndx; - Elf_Scn *scn; - Elf *elf; - GElf_Shdr mem, *shdr; - const char *p; - - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); - if (!elf) - return -EINVAL; - - /* Get the number of relocations */ - n = dwfl_module_relocations(dbg->mod); - if (n < 0) - return -ENOENT; - /* Search the relocation related .text section */ - for (i = 0; i < n; i++) { - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); - if (strcmp(p, ".text") == 0) { - /* OK, get the section header */ - scn = elf_getscn(elf, shndx); - if (!scn) - return -ENOENT; - shdr = gelf_getshdr(scn, &mem); - if (!shdr) - return -ENOENT; - *offs = shdr->sh_addr; - if (adjust_offset) - *offs -= shdr->sh_offset; - } - } - return 0; -} - /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) @@ -2009,41 +1853,6 @@ found: return (ret < 0) ? ret : lf.found; } -#ifdef HAVE_DEBUGINFOD_SUPPORT -/* debuginfod doesn't require the comp_dir but buildid is required */ -static int get_source_from_debuginfod(const char *raw_path, - const char *sbuild_id, char **new_path) -{ - debuginfod_client *c = debuginfod_begin(); - const char *p = raw_path; - int fd; - - if (!c) - return -ENOMEM; - - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, - 0, p, new_path); - pr_debug("Search %s from debuginfod -> %d\n", p, fd); - if (fd >= 0) - close(fd); - debuginfod_end(c); - if (fd < 0) { - pr_debug("Failed to find %s in debuginfod (%s)\n", - raw_path, sbuild_id); - return -ENOENT; - } - pr_debug("Got a source %s\n", *new_path); - - return 0; -} -#else -static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, - const char *sbuild_id __maybe_unused, - char **new_path __maybe_unused) -{ - return -ENOTSUP; -} -#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 8bc1c80d3c1c..3add5ff516e1 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -24,21 +24,7 @@ static inline int is_c_varname(const char *name) #ifdef HAVE_DWARF_SUPPORT #include "dwarf-aux.h" - -/* TODO: export debuginfo data structure even if no dwarf support */ - -/* debug information structure */ -struct debuginfo { - Dwarf *dbg; - Dwfl_Module *mod; - Dwfl *dwfl; - Dwarf_Addr bias; - const unsigned char *build_id; -}; - -/* This also tries to open distro debuginfo */ -struct debuginfo *debuginfo__new(const char *path); -void debuginfo__delete(struct debuginfo *dbg); +#include "debuginfo.h" /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, @@ -49,9 +35,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset); - /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9eb5c6a08999..87e817b3cf7e 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -237,8 +237,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); if (cpus) cpu = perf_cpu_map__cpu(cpus, 0); diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index f55ca07f3ca1..74b36644e384 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -12,6 +12,8 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ +#define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 115b16edb451..53383e97ec9d 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -51,8 +51,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample) struct cf_trailer_entry *te; struct cf_ctrset_entry *cep, ce; - if (!len) - return false; while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); ce.def = be16_to_cpu(cep->def); @@ -125,6 +123,9 @@ static int get_counterset_start(int setnr) return 128; case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ return 448; + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ + return setnr; default: return -1; } @@ -212,27 +213,120 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" +/* + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. + */ +struct pai_data { /* Event number and value */ + u16 event_nr; + u64 event_val; +} __packed; + +#pragma GCC diagnostic pop + +/* + * Test for valid raw data. At least one PAI event should be in the raw + * data section. + */ +static bool s390_pai_all_test(struct perf_sample *sample) +{ + size_t len = sample->raw_size; + + if (len < 0xa) + return false; + return true; +} + +static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *p = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct pai_data pai_data; + char *ev_name; + + while (offset < len) { + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); + p += sizeof(pai_data.event_nr); + offset += sizeof(pai_data.event_nr); + + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); + pai_data.event_val = be64_to_cpu(pai_data.event_val); + p += sizeof(pai_data.event_val); + offset += sizeof(pai_data.event_val); + + ev_name = get_counter_name(evsel->core.attr.config, + pai_data.event_nr, evsel->pmu); + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + pai_data.event_nr, ev_name ?: "<unknown>", + pai_data.event_val); + free(ev_name); + + if (offset + 0xa > len) + break; + } + color_fprintf(stdout, color, "\n"); +} + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events - * and if the event was triggered by a counter set diagnostic event display - * its raw data. + * and if the event was triggered by a + * - counter set diagnostic event + * - processor activity assist (PAI) crypto counter event + * - processor activity assist (PAI) neural network processor assist (NNPA) + * counter event + * display its raw data. * The function is only invoked when the dump flag -D is set. + * + * Function evlist__s390_sample_raw() is defined as call back after it has + * been verified that the perf.data file was created on s390 platform. */ -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) { + const char *pai_name; struct evsel *evsel; if (event->header.type != PERF_RECORD_SAMPLE) return; evsel = evlist__event2evsel(evlist, event); - if (evsel == NULL || - evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) + if (!evsel) + return; + + /* Check for raw data in sample */ + if (!sample->raw_size || !sample->raw_data) return; /* Display raw data on screen */ - if (!s390_cpumcfdg_testctr(sample)) { - pr_err("Invalid counter set data encountered\n"); + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find("cpum_cf"); + if (!s390_cpumcfdg_testctr(sample)) + pr_err("Invalid counter set data encountered\n"); + else + s390_cpumcfdg_dump(evsel->pmu, sample); + return; + } + + switch (evsel->core.attr.config) { + case PERF_EVENT_PAI_NNPA_ALL: + pai_name = "NNPA_ALL"; + break; + case PERF_EVENT_PAI_CRYPTO_ALL: + pai_name = "CRYPTO_ALL"; + break; + default: return; } - s390_cpumcfdg_dump(evsel->pmu, sample); + + if (!s390_pai_all_test(sample)) { + pr_err("Invalid %s raw data encountered\n", pai_name); + } else { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); + s390_pai_all_dump(evsel, sample); + } } diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index c92ad0f51ecd..70b2c3135555 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -113,6 +113,7 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + u64 *branch_stack_cntr; struct regs_dump user_regs; struct regs_dump intr_regs; struct stack_dump user_stack; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 603091317bed..b072ac5d3bc2 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -490,6 +490,9 @@ static int perl_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; command_line = malloc((argc + 2) * sizeof(const char *)); + if (!command_line) + return -ENOMEM; + command_line[0] = ""; command_line[1] = script; for (i = 2; i < argc + 2; i++) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 94312741443a..860e1837ba96 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -353,6 +353,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event, if (is_array) { list = PyList_New(field->arraylen); + if (!list) + Py_FatalError("couldn't create Python list"); item_size = field->size / field->arraylen; n_items = field->arraylen; } else { @@ -754,7 +756,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } -static void set_regs_in_dict(PyObject *dict, +static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) { @@ -770,6 +772,8 @@ static void set_regs_in_dict(PyObject *dict, */ int size = __sw_hweight64(attr->sample_regs_intr) * 28; char *bf = malloc(size); + if (!bf) + return -1; regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); @@ -781,6 +785,8 @@ static void set_regs_in_dict(PyObject *dict, pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); free(bf); + + return 0; } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, @@ -920,7 +926,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->cyc_cnt)); } - set_regs_in_dict(dict, sample, evsel); + if (set_regs_in_dict(dict, sample, evsel)) + Py_FatalError("Failed to setting regs in dict"); return dict; } @@ -1918,12 +1925,18 @@ static int python_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; #if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); + if (!command_line) + return -1; + command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; PyImport_AppendInittab(name, initperf_trace_context); #else command_line = malloc((argc + 1) * sizeof(wchar_t *)); + if (!command_line) + return -1; + command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1e9aa8ed15b6..199d3e8df315 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -115,6 +115,11 @@ static int perf_session__open(struct perf_session *session, int repipe_fd) return -1; } + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { + /* Auxiliary events may reference exited threads, hold onto dead ones. */ + symbol_conf.keep_exited_threads = true; + } + if (perf_data__is_pipe(data)) return 0; @@ -1150,9 +1155,13 @@ static void callchain__printf(struct evsel *evsel, i, callchain->ips[i]); } -static void branch_stack__printf(struct perf_sample *sample, bool callstack) +static void branch_stack__printf(struct perf_sample *sample, + struct evsel *evsel) { struct branch_entry *entries = perf_sample__branch_entries(sample); + bool callstack = evsel__has_branch_callstack(evsel); + u64 *branch_stack_cntr = sample->branch_stack_cntr; + struct perf_env *env = evsel__env(evsel); uint64_t i; if (!callstack) { @@ -1194,6 +1203,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } } + + if (branch_stack_cntr) { + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); + } } static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) @@ -1355,7 +1371,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, callchain__printf(evsel, sample); if (evsel__has_br_stack(evsel)) - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); + branch_stack__printf(sample, evsel); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample, arch); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80e4f6132740..30254eb63709 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "mem-events.h" #include "annotate.h" +#include "annotate-data.h" #include "event.h" #include "time-utils.h" #include "cgroup.h" @@ -418,6 +419,52 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort symoff */ + +static int64_t +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_cmp(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int64_t +sort__symoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_sort(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int +hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + struct symbol *sym = he->ms.sym; + + if (sym == NULL) + return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip); + + return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start); +} + +struct sort_entry sort_sym_offset = { + .se_header = "Symbol Offset", + .se_cmp = sort__symoff_cmp, + .se_sort = sort__symoff_sort, + .se_snprintf = hist_entry__symoff_snprintf, + .se_filter = hist_entry__sym_filter, + .se_width_idx = HISTC_SYMBOL_OFFSET, +}; + /* --sort srcline */ char *hist_entry__srcline(struct hist_entry *he) @@ -583,21 +630,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); @@ -2094,7 +2141,7 @@ struct sort_entry sort_dso_size = { .se_width_idx = HISTC_DSO_SIZE, }; -/* --sort dso_size */ +/* --sort addr */ static int64_t sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -2131,6 +2178,152 @@ struct sort_entry sort_addr = { .se_width_idx = HISTC_ADDR, }; +/* --sort type */ + +struct annotated_data_type unknown_type = { + .self = { + .type_name = (char *)"(unknown)", + .children = LIST_HEAD_INIT(unknown_type.self.children), + }, +}; + +static int64_t +sort__type_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return sort__addr_cmp(left, right); +} + +static void sort__type_init(struct hist_entry *he) +{ + if (he->mem_type) + return; + + he->mem_type = hist_entry__get_data_type(he); + if (he->mem_type == NULL) { + he->mem_type = &unknown_type; + he->mem_type_off = 0; + } +} + +static int64_t +sort__type_collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + return strcmp(left_type->self.type_name, right_type->self.type_name); +} + +static int64_t +sort__type_sort(struct hist_entry *left, struct hist_entry *right) +{ + return sort__type_collapse(left, right); +} + +static int hist_entry__type_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name); +} + +struct sort_entry sort_type = { + .se_header = "Data Type", + .se_cmp = sort__type_cmp, + .se_collapse = sort__type_collapse, + .se_sort = sort__type_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__type_snprintf, + .se_width_idx = HISTC_TYPE, +}; + +/* --sort typeoff */ + +static int64_t +sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + return left->mem_type_off - right->mem_type_off; +} + +static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return; + + list_for_each_entry(child, &m->children, node) { + if (child->offset <= offset && offset < child->offset + child->size) { + int len = 0; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } + + fill_member_name(buf + len, sz - len, child, offset, first); + return; + } + } +} + +static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + char buf[4096]; + + buf[0] = '\0'; + if (list_empty(&he_type->self.children)) + snprintf(buf, sizeof(buf), "no field"); + else + fill_member_name(buf, sizeof(buf), &he_type->self, + he->mem_type_off, true); + buf[4095] = '\0'; + + return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name, + he->mem_type_off, buf); +} + +struct sort_entry sort_type_offset = { + .se_header = "Data Type Offset", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typeoff_sort, + .se_sort = sort__typeoff_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typeoff_snprintf, + .se_width_idx = HISTC_TYPE_OFFSET, +}; + struct sort_dimension { const char *name; @@ -2185,7 +2378,10 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), - DIM(SORT_SIMD, "simd", sort_simd) + DIM(SORT_SIMD, "simd", sort_simd), + DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), + DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), + DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), }; #undef DIM @@ -3205,6 +3401,8 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, list->thread = 1; } else if (sd->entry == &sort_comm) { list->comm = 1; + } else if (sd->entry == &sort_type_offset) { + symbol_conf.annotate_data_member = true; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ecfb7f1359d5..6f6b4189a389 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -15,6 +15,7 @@ struct option; struct thread; +struct annotated_data_type; extern regex_t parent_regex; extern const char *sort_order; @@ -34,6 +35,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; +extern struct sort_entry sort_type; extern const char default_mem_sort_order[]; extern bool chk_double_cl; @@ -111,6 +113,7 @@ struct hist_entry { u64 p_stage_cyc; u8 cpumode; u8 depth; + int mem_type_off; struct simd_flags simd_flags; /* We are added by hists__add_dummy_entry. */ @@ -154,6 +157,7 @@ struct hist_entry { struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; struct hist_entry_ops *ops; + struct annotated_data_type *mem_type; union { /* this is for hierarchical entry structure */ struct { @@ -243,6 +247,9 @@ enum sort_type { SORT_LOCAL_RETIRE_LAT, SORT_GLOBAL_RETIRE_LAT, SORT_SIMD, + SORT_ANNOTATE_DATA_TYPE, + SORT_ANNOTATE_DATA_TYPE_OFFSET, + SORT_SYM_OFFSET, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index afe6db8e7bf4..8c61f8627ebc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -898,7 +898,7 @@ static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (config->no_merge || hybrid_uniquify(counter, config)) + if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) uniquify_event_name(counter); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 1c5c3eeba4cf..e31426167852 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -264,7 +264,7 @@ static void print_ll_miss(struct perf_stat_config *config, static const double color_ratios[3] = {20.0, 10.0, 5.0}; print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, - "of all L1-icache accesses"); + "of all LL-cache accesses"); } static void print_dtlb_miss(struct perf_stat_config *config, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ec3506042217..b0bcf92f0f9c 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!counter->per_pkg) return 0; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return 0; if (!mask) { @@ -592,7 +592,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev { struct evsel *evsel; - if (config->no_merge) + if (config->aggr_mode == AGGR_NONE) return; evlist__for_each_entry(evlist, evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 325d0fad1842..4357ba114822 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -76,7 +76,6 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; - bool no_merge; bool hybrid_merge; bool walltime_run_table; bool all_kernel; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e7eeaf616b8..4b934ed3bfd1 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1392,8 +1392,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); map__set_end(map, map__start(map) + shdr->sh_size); map__set_pgoff(map, shdr->sh_offset); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); /* Ensure maps are correctly ordered */ if (kmaps) { int err; @@ -1455,8 +1454,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); map__set_pgoff(curr_map, shdr->sh_offset); } else { - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } curr_dso->symtab_type = dso->symtab_type; if (maps__insert(kmaps, curr_map)) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a81a14769bd1..1da8b713509c 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -159,9 +159,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } else { Elf64_Ehdr ehdr; @@ -210,9 +211,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } out_free: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 82cc74b9358e..be212ba157dc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,11 +48,6 @@ static bool symbol__is_idle(const char *name); int vmlinux_path__nr_entries; char **vmlinux_path; -struct map_list_node { - struct list_head node; - struct map *map; -}; - struct symbol_conf symbol_conf = { .nanosecs = false, .use_modules = true, @@ -90,11 +85,6 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static struct map_list_node *map_list_node__new(void) -{ - return malloc(sizeof(struct map_list_node)); -} - static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); @@ -270,29 +260,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) curr->end = roundup(curr->start, 4096) + 4096; } -void maps__fixup_end(struct maps *maps) -{ - struct map_rb_node *prev = NULL, *curr; - - down_write(maps__lock(maps)); - - maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) - map__set_end(prev->map, map__start(curr->map)); - - prev = curr; - } - - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - if (curr && !map__end(curr->map)) - map__set_end(curr->map, ~0ULL); - - up_write(maps__lock(maps)); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; @@ -956,8 +923,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; } - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); if (maps__insert(kmaps, curr_map)) { dso__put(ndso); return -1; @@ -1148,33 +1114,35 @@ out_delete_from: return ret; } +static int do_validate_kcore_modules_cb(struct map *old_map, void *data) +{ + struct rb_root *modules = data; + struct module_info *mi; + struct dso *dso; + + if (!__map__is_kmodule(old_map)) + return 0; + + dso = map__dso(old_map); + /* Module must be in memory at the same address */ + mi = find_module(dso->short_name, modules); + if (!mi || mi->start != map__start(old_map)) + return -EINVAL; + + return 0; +} + static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; - struct map_rb_node *old_node; int err; err = read_proc_modules(filename, &modules); if (err) return err; - maps__for_each_entry(kmaps, old_node) { - struct map *old_map = old_node->map; - struct module_info *mi; - struct dso *dso; + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); - if (!__map__is_kmodule(old_map)) { - continue; - } - dso = map__dso(old_map); - /* Module must be in memory at the same address */ - mi = find_module(dso->short_name, &modules); - if (!mi || mi->start != map__start(old_map)) { - err = -EINVAL; - goto out; - } - } -out: delete_modules(&modules); return err; } @@ -1271,101 +1239,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } -/* - * Merges map into maps by splitting the new map within the existing map - * regions. - */ -int maps__merge_in(struct maps *kmaps, struct map *new_map) +static bool remove_old_maps(struct map *map, void *data) { - struct map_rb_node *rb_node; - LIST_HEAD(merged); - int err = 0; - - maps__for_each_entry(kmaps, rb_node) { - struct map *old_map = rb_node->map; - - /* no overload with this one */ - if (map__end(new_map) < map__start(old_map) || - map__start(new_map) >= map__end(old_map)) - continue; - - if (map__start(new_map) < map__start(old_map)) { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new......| -> |new..| - * |old....| -> |old....| - */ - map__set_end(new_map, map__start(old_map)); - } else { - /* - * |new.............| -> |new..| |new..| - * |old....| -> |old....| - */ - struct map_list_node *m = map_list_node__new(); - - if (!m) { - err = -ENOMEM; - goto out; - } - - m->map = map__clone(new_map); - if (!m->map) { - free(m); - err = -ENOMEM; - goto out; - } - - map__set_end(m->map, map__start(old_map)); - list_add_tail(&m->node, &merged); - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } else { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new..| -> x - * |old.........| -> |old.........| - */ - map__put(new_map); - new_map = NULL; - break; - } else { - /* - * |new......| -> |new...| - * |old....| -> |old....| - */ - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } - } - -out: - while (!list_empty(&merged)) { - struct map_list_node *old_node; - - old_node = list_entry(merged.next, struct map_list_node, node); - list_del_init(&old_node->node); - if (!err) - err = maps__insert(kmaps, old_node->map); - map__put(old_node->map); - free(old_node); - } + const struct map *map_to_save = data; - if (new_map) { - if (!err) - err = maps__insert(kmaps, new_map); - map__put(new_map); - } - return err; + /* + * We need to preserve eBPF maps even if they are covered by kcore, + * because we need to access eBPF dso for source data. + */ + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); } static int dso__load_kcore(struct dso *dso, struct map *map, @@ -1374,7 +1256,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map, struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; struct map *replacement_map = NULL; - struct map_rb_node *old_node, *next; struct machine *machine; bool is_64_bit; int err, fd; @@ -1421,17 +1302,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - maps__for_each_entry_safe(kmaps, old_node, next) { - struct map *old_map = old_node->map; - - /* - * We need to preserve eBPF maps even if they are - * covered by kcore, because we need to access - * eBPF dso for source data. - */ - if (old_map != map && !__map__is_bpf_prog(old_map)) - maps__remove(kmaps, old_map); - } + maps__remove_maps(kmaps, remove_old_maps, map); machine->trampolines_mapped = false; /* Find the kernel map using the '_stext' symbol */ @@ -1475,8 +1346,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__set_start(map, map__start(new_map)); map__set_end(map, map__end(new_map)); map__set_pgoff(map, map__pgoff(new_map)); - map__set_map_ip(map, map__map_ip_ptr(new_map)); - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); + map__set_mapping_type(map, map__mapping_type(new_map)); /* Ensure maps are correctly ordered */ map_ref = map__get(map); maps__remove(kmaps, map_ref); @@ -2067,124 +1937,6 @@ out: return ret; } -static int map__strcmp(const void *a, const void *b) -{ - const struct map *map_a = *(const struct map **)a; - const struct map *map_b = *(const struct map **)b; - const struct dso *dso_a = map__dso(map_a); - const struct dso *dso_b = map__dso(map_b); - int ret = strcmp(dso_a->short_name, dso_b->short_name); - - if (ret == 0 && map_a != map_b) { - /* - * Ensure distinct but name equal maps have an order in part to - * aid reference counting. - */ - ret = (int)map__start(map_a) - (int)map__start(map_b); - if (ret == 0) - ret = (int)((intptr_t)map_a - (intptr_t)map_b); - } - - return ret; -} - -static int map__strcmp_name(const void *name, const void *b) -{ - const struct dso *dso = map__dso(*(const struct map **)b); - - return strcmp(name, dso->short_name); -} - -void __maps__sort_by_name(struct maps *maps) -{ - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); -} - -static int map__groups__sort_by_name_from_rbtree(struct maps *maps) -{ - struct map_rb_node *rb_node; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - maps__nr_maps(maps) * sizeof(struct map *)); - int i = 0; - - if (maps_by_name == NULL) - return -1; - - up_read(maps__lock(maps)); - down_write(maps__lock(maps)); - - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); - - maps__for_each_entry(maps, rb_node) - maps_by_name[i++] = map__get(rb_node->map); - - __maps__sort_by_name(maps); - - up_write(maps__lock(maps)); - down_read(maps__lock(maps)); - - return 0; -} - -static struct map *__maps__find_by_name(struct maps *maps, const char *name) -{ - struct map **mapp; - - if (maps__maps_by_name(maps) == NULL && - map__groups__sort_by_name_from_rbtree(maps)) - return NULL; - - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), - sizeof(*mapp), map__strcmp_name); - if (mapp) - return *mapp; - return NULL; -} - -struct map *maps__find_by_name(struct maps *maps, const char *name) -{ - struct map_rb_node *rb_node; - struct map *map; - - down_read(maps__lock(maps)); - - - if (RC_CHK_ACCESS(maps)->last_search_by_name) { - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); - - if (strcmp(dso->short_name, name) == 0) { - map = RC_CHK_ACCESS(maps)->last_search_by_name; - goto out_unlock; - } - } - /* - * If we have maps->maps_by_name, then the name isn't in the rbtree, - * as maps->maps_by_name mirrors the rbtree when lookups by name are - * made. - */ - map = __maps__find_by_name(maps, name); - if (map || maps__maps_by_name(maps) != NULL) - goto out_unlock; - - /* Fallback to traversing the rbtree... */ - maps__for_each_entry(maps, rb_node) { - struct dso *dso; - - map = rb_node->map; - dso = map__dso(map); - if (strcmp(dso->short_name, name) == 0) { - RC_CHK_ACCESS(maps)->last_search_by_name = map; - goto out_unlock; - } - } - map = NULL; - -out_unlock: - up_read(maps__lock(maps)); - return map; -} - int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af87c46b3f89..071837ddce2a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); -void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 0b589570d1d0..c114bbceef40 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,11 @@ struct symbol_conf { inline_name, disable_add2line_warn, buildid_mmap2, - guest_code; + guest_code, + lazy_load_kernel_maps, + keep_exited_threads, + annotate_data_member, + annotate_data_sample; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a0579c7d7b9e..3712186353fb 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -665,18 +665,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, } #endif +struct perf_event__synthesize_modules_maps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) +{ + struct perf_event__synthesize_modules_maps_cb_args *args = data; + union perf_event *event = args->event; + struct dso *dso; + size_t size; + + if (!__map__is_kmodule(map)) + return 0; + + dso = map__dso(map); + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); + event->mmap2.header.size += args->machine->id_hdr_size; + event->mmap2.start = map__start(map); + event->mmap2.len = map__size(map); + event->mmap2.pid = args->machine->pid; + + memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); + } else { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); + event->mmap.header.size += args->machine->id_hdr_size; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pid = args->machine->pid; + + memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); + } + + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - int rc = 0; - struct map_rb_node *pos; + int rc; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + struct perf_event__synthesize_modules_maps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + }; + size_t size = symbol_conf.buildid_mmap2 + ? sizeof(args.event->mmap2) + : sizeof(args.event->mmap); - event = zalloc(size + machine->id_hdr_size); - if (event == NULL) { + args.event = zalloc(size + machine->id_hdr_size); + if (args.event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; @@ -687,53 +743,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t * __perf_event_mmap */ if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; + args.event->header.misc = PERF_RECORD_MISC_KERNEL; else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - maps__for_each_entry(maps, pos) { - struct map *map = pos->map; - struct dso *dso; + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (!__map__is_kmodule(map)) - continue; + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); - dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap2.header.type = PERF_RECORD_MMAP2; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.start = map__start(map); - event->mmap2.len = map__size(map); - event->mmap2.pid = machine->pid; - - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); - - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); - } else { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); - } - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); + free(args.event); return rc; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fe5e6991ae4b..89c47a5098e2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -345,38 +345,36 @@ int thread__insert_map(struct thread *thread, struct map *map) if (ret) return ret; - maps__fixup_overlappings(thread__maps(thread), map, stderr); - return maps__insert(thread__maps(thread), map); + return maps__fixup_overlap_and_insert(thread__maps(thread), map); } -static int __thread__prepare_access(struct thread *thread) +struct thread__prepare_access_maps_cb_args { + int err; + struct maps *maps; +}; + +static int thread__prepare_access_maps_cb(struct map *map, void *data) { bool initialized = false; - int err = 0; - struct maps *maps = thread__maps(thread); - struct map_rb_node *rb_node; - - down_read(maps__lock(maps)); - - maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); - if (err || initialized) - break; - } + struct thread__prepare_access_maps_cb_args *args = data; - up_read(maps__lock(maps)); + args->err = unwind__prepare_access(args->maps, map, &initialized); - return err; + return (args->err || initialized) ? 1 : 0; } static int thread__prepare_access(struct thread *thread) { - int err = 0; + struct thread__prepare_access_maps_cb_args args = { + .err = 0, + }; - if (dwarf_callchain_users) - err = __thread__prepare_access(thread); + if (dwarf_callchain_users) { + args.maps = thread__maps(thread); + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); + } - return err; + return args.err; } static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) @@ -385,14 +383,14 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread__maps(thread) == thread__maps(parent)) { + if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread__pid(thread), thread__tid(thread), thread__pid(parent), thread__tid(parent)); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e79225a0ea46..0df775b5c110 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -36,13 +36,22 @@ struct thread_rb_node { }; DECLARE_RC_STRUCT(thread) { + /** @maps: mmaps associated with this thread. */ struct maps *maps; pid_t pid_; /* Not all tools update this */ + /** @tid: thread ID number unique to a machine. */ pid_t tid; + /** @ppid: parent process of the process this thread belongs to. */ pid_t ppid; int cpu; int guest_cpu; /* For QEMU thread */ refcount_t refcnt; + /** + * @exited: Has the thread had an exit event. Such threads are usually + * removed from the machine's threads but some events/tools require + * access to dead threads. + */ + bool exited; bool comm_set; int comm_len; struct list_head namespaces_list; @@ -189,6 +198,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread) return &RC_CHK_ACCESS(thread)->refcnt; } +static inline void thread__set_exited(struct thread *thread, bool exited) +{ + RC_CHK_ACCESS(thread)->exited = exited; +} + static inline bool thread__comm_set(const struct thread *thread) { return RC_CHK_ACCESS(thread)->comm_set; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index be7157de0451..4db3d1bd686c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct record_opts *opts = &top->record_opts; struct target *target = &opts->target; size_t ret = 0; + int nr_cpus; if (top->samples) { samples_per_sec = top->samples / top->delay_secs; @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else ret += SNPRINTF(bf + ret, size - ret, " (all"); + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : "", + nr_cpus > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : ""); + nr_cpus, nr_cpus > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a8b0d79bd96c..4c5588dbb131 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -21,7 +21,6 @@ struct perf_top { struct perf_tool tool; struct evlist *evlist, *sb_evlist; struct record_opts record_opts; - struct annotation_options annotation_opts; struct evswitch evswitch; /* * Symbols will be added here in perf_event__process_sample and will diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8554db3fc0d7..6013335a8dae 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -46,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -58,13 +59,25 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls into + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + base = map__start(al->map); + else + base = map__start(al->map) - map__pgoff(al->map); + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != map__start(al->map) - map__pgoff(al->map)) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) { @@ -72,14 +85,14 @@ static int __report_module(struct addr_location *al, u64 ip, __symbol__join_symfs(filename, sizeof(filename), dso->long_name); mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (mod) { diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index c0641882fd2f..dac536e28360 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -302,12 +302,31 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, return 0; } +struct read_unwind_spec_eh_frame_maps_cb_args { + struct dso *dso; + u64 base_addr; +}; + +static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) +{ + + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; + + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) + args->base_addr = map__start(map) - map__pgoff(map); + + return 0; +} + + static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - struct map_rb_node *map_node; - u64 base_addr = UINT64_MAX; + struct read_unwind_spec_eh_frame_maps_cb_args args = { + .dso = dso, + .base_addr = UINT64_MAX, + }; int ret, fd; if (dso->data.eh_frame_hdr_offset == 0) { @@ -325,16 +344,11 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, return -EINVAL; } - maps__for_each_entry(thread__maps(ui->thread), map_node) { - struct map *map = map_node->map; - u64 start = map__start(map); + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); - if (map__dso(map) == dso && start < base_addr) - base_addr = start; - } - base_addr -= dso->data.elf_base_addr; + args.base_addr -= dso->data.elf_base_addr; /* Address of .eh_frame_hdr */ - *segbase = base_addr + dso->data.eh_frame_hdr_addr; + *segbase = args.base_addr + dso->data.eh_frame_hdr_addr; ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, table_data, fde_count); if (ret) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ae3eee69b659..df8963796187 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -140,23 +140,34 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s return dso; } +struct machine__thread_dso_type_maps_cb_args { + struct machine *machine; + enum dso_type dso_type; +}; + +static int machine__thread_dso_type_maps_cb(struct map *map, void *data) +{ + struct machine__thread_dso_type_maps_cb_args *args = data; + struct dso *dso = map__dso(map); + + if (!dso || dso->long_name[0] != '/') + return 0; + + args->dso_type = dso__type(dso, args->machine); + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; +} + static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { - enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map_rb_node *rb_node; - - maps__for_each_entry(thread__maps(thread), rb_node) { - struct dso *dso = map__dso(rb_node->map); + struct machine__thread_dso_type_maps_cb_args args = { + .machine = machine, + .dso_type = DSO__TYPE_UNKNOWN, + }; - if (!dso || dso->long_name[0] != '/') - continue; - dso_type = dso__type(dso, machine); - if (dso_type != DSO__TYPE_UNKNOWN) - break; - } + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); - return dso_type; + return args.dso_type; } #if BITS_PER_LONG == 64 diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index 48dd2b018c47..57027e0ac7b6 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -7,35 +7,9 @@ int zstd_init(struct zstd_data *data, int level) { - size_t ret; - - data->dstream = ZSTD_createDStream(); - if (data->dstream == NULL) { - pr_err("Couldn't create decompression stream.\n"); - return -1; - } - - ret = ZSTD_initDStream(data->dstream); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - - if (!level) - return 0; - - data->cstream = ZSTD_createCStream(); - if (data->cstream == NULL) { - pr_err("Couldn't create compression stream.\n"); - return -1; - } - - ret = ZSTD_initCStream(data->cstream, level); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - + data->comp_level = level; + data->dstream = NULL; + data->cstream = NULL; return 0; } @@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data) return 0; } -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)) { @@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t ZSTD_outBuffer output; void *record; + if (!data->cstream) { + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, data->comp_level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", + ZSTD_getErrorName(ret)); + return -1; + } + } + while (input.pos < input.size) { record = dst; size = process_header(record, 0); @@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size ZSTD_inBuffer input = { src, src_size, 0 }; ZSTD_outBuffer output = { dst, dst_size, 0 }; + if (!data->dstream) { + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return 0; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", + ZSTD_getErrorName(ret)); + return 0; + } + } while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { |