diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
commit | 9d9af1007bc08971953ae915d88dc9bb21344b53 (patch) | |
tree | 02090da0b271c31f19d58d80f4cff19c8ef89971 /tools/perf/util/callchain.c | |
parent | a1e16bc7d5f7ca3599d8a7f061841c93a563665e (diff) | |
parent | 744aec4df2c5b4d12af26a57d8858af2f59ef3d0 (diff) | |
download | lwn-9d9af1007bc08971953ae915d88dc9bb21344b53.tar.gz lwn-9d9af1007bc08971953ae915d88dc9bb21344b53.zip |
Merge tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo:
- cgroup improvements for 'perf stat', allowing for compact
specification of events and cgroups in the command line.
- Support per thread topdown metrics in 'perf stat'.
- Support sample-read topdown metric group in 'perf record'
- Show start of latency in addition to its start in 'perf sched
latency'.
- Add min, max to 'perf script' futex-contention output, in addition to
avg.
- Allow usage of 'perf_event_attr->exclusive' attribute via the new
':e' event modifier.
- Add 'snapshot' command to 'perf record --control', using it with
Intel PT.
- Support FIFO file names as alternative options to 'perf record
--control'.
- Introduce branch history "streams", to compare 'perf record' runs
with 'perf diff' based on branch records and report hot streams.
- Support PE executable symbol tables using libbfd, to profile, for
instance, wine binaries.
- Add filter support for option 'perf ftrace -F/--funcs'.
- Allow configuring the 'disassembler_style' 'perf annotate' knob via
'perf config'
- Update CascadelakeX and SkylakeX JSON vendor events files.
- Add support for parsing perchip/percore JSON vendor events.
- Add power9 hv_24x7 core level metric events.
- Add L2 prefetch, ITLB instruction fetch hits JSON events for AMD
zen1.
- Enable Family 19h users by matching Zen2 AMD vendor events.
- Use debuginfod in 'perf probe' when required debug files not found
locally.
- Display negative tid in non-sample events in 'perf script'.
- Make GTK2 support opt-in
- Add build test with GTK+
- Add missing -lzstd to the fast path feature detection
- Add scripts to auto generate 'mmap', 'mremap' string<->id tables for
use in 'perf trace'.
- Show python test script in verbose mode.
- Fix uncore metric expressions
- Msan uninitialized use fixes.
- Use condition variables in 'perf bench numa'
- Autodetect python3 binary in systems without python2.
- Support md5 build ids in addition to sha1.
- Add build id 'perf test' regression test.
- Fix printable strings in python3 scripts.
- Fix off by ones in 'perf trace' in arches using libaudit.
- Fix JSON event code for events referencing std arch events.
- Introduce 'perf test' shell script for Arm CoreSight testing.
- Add rdtsc() for Arm64 for used in the PERF_RECORD_TIME_CONV metadata
event and in 'perf test tsc'.
- 'perf c2c' improvements: Add "RMT Load Hit" metric, "Total Stores",
fixes and documentation update.
- Fix usage of reloc_sym in 'perf probe' when using both kallsyms and
debuginfo files.
- Do not print 'Metric Groups:' unnecessarily in 'perf list'
- Refcounting fixes in the event parsing code.
- Add expand cgroup event 'perf test' entry.
- Fix out of bounds CPU map access when handling armv8_pmu events in
'perf stat'.
- Add build-id injection 'perf bench' benchmark.
- Enter namespace when reading build-id in 'perf inject'.
- Do not load map/dso when injecting build-id speeding up the 'perf
inject' process.
- Add --buildid-all option to avoid processing all samples, just the
mmap metadata events.
- Add feature test to check if libbfd has buildid support
- Add 'perf test' entry for PE binary format support.
- Fix typos in power8 PMU vendor events JSON files.
- Hide libtraceevent non API functions.
* tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (113 commits)
perf c2c: Update documentation for metrics reorganization
perf c2c: Add metrics "RMT Load Hit"
perf c2c: Correct LLC load hit metrics
perf c2c: Change header for LLC local hit
perf c2c: Use more explicit headers for HITM
perf c2c: Change header from "LLC Load Hitm" to "Load Hitm"
perf c2c: Organize metrics based on memory hierarchy
perf c2c: Display "Total Stores" as a standalone metrics
perf c2c: Display the total numbers continuously
perf bench: Use condition variables in numa.
perf jevents: Fix event code for events referencing std arch events
perf diff: Support hot streams comparison
perf streams: Report hot streams
perf streams: Calculate the sum of total streams hits
perf streams: Link stream pair
perf streams: Compare two streams
perf streams: Get the evsel_streams by evsel_idx
perf streams: Introduce branch history "streams"
perf intel-pt: Improve PT documentation slightly
perf tools: Add support for exclusive groups/events
...
Diffstat (limited to 'tools/perf/util/callchain.c')
-rw-r--r-- | tools/perf/util/callchain.c | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2775b752f2fa..1b60985690bb 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1613,3 +1613,102 @@ void callchain_param_setup(u64 sample_type) callchain_param.record_mode = CALLCHAIN_FP; } } + +static bool chain_match(struct callchain_list *base_chain, + struct callchain_list *pair_chain) +{ + enum match_result match; + + match = match_chain_strings(base_chain->srcline, + pair_chain->srcline); + if (match != MATCH_ERROR) + return match == MATCH_EQ; + + match = match_chain_dso_addresses(base_chain->ms.map, + base_chain->ip, + pair_chain->ms.map, + pair_chain->ip); + + return match == MATCH_EQ; +} + +bool callchain_cnode_matched(struct callchain_node *base_cnode, + struct callchain_node *pair_cnode) +{ + struct callchain_list *base_chain, *pair_chain; + bool match = false; + + pair_chain = list_first_entry(&pair_cnode->val, + struct callchain_list, + list); + + list_for_each_entry(base_chain, &base_cnode->val, list) { + if (&pair_chain->list == &pair_cnode->val) + return false; + + if (!base_chain->srcline || !pair_chain->srcline) { + pair_chain = list_next_entry(pair_chain, list); + continue; + } + + match = chain_match(base_chain, pair_chain); + if (!match) + return false; + + pair_chain = list_next_entry(pair_chain, list); + } + + /* + * Say chain1 is ABC, chain2 is ABCD, we consider they are + * not fully matched. + */ + if (pair_chain && (&pair_chain->list != &pair_cnode->val)) + return false; + + return match; +} + +static u64 count_callchain_hits(struct hist_entry *he) +{ + struct rb_root *root = &he->sorted_chain; + struct rb_node *rb_node = rb_first(root); + struct callchain_node *node; + u64 chain_hits = 0; + + while (rb_node) { + node = rb_entry(rb_node, struct callchain_node, rb_node); + chain_hits += node->hit; + rb_node = rb_next(rb_node); + } + + return chain_hits; +} + +u64 callchain_total_hits(struct hists *hists) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + u64 chain_hits = 0; + + while (next) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node); + + chain_hits += count_callchain_hits(he); + next = rb_next(&he->rb_node); + } + + return chain_hits; +} + +s64 callchain_avg_cycles(struct callchain_node *cnode) +{ + struct callchain_list *chain; + s64 cycles = 0; + + list_for_each_entry(chain, &cnode->val, list) { + if (chain->srcline && chain->branch_count) + cycles += chain->cycles_count / chain->branch_count; + } + + return cycles; +} |