diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-19 11:20:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-19 11:20:22 -0700 |
commit | 1ba3b5dc14923021f7815ea0b63baa34a3e1a998 (patch) | |
tree | a2381065b8131be82dc4def2c633ffb54ee9929c /tools/perf/util/stat-display.c | |
parent | a13f950ef13ff1eaf2ce14f5462ca59c4b60fdd0 (diff) | |
parent | 62e1c09418fc16d27720b128275cac61367e2c1b (diff) | |
download | lwn-1ba3b5dc14923021f7815ea0b63baa34a3e1a998.tar.gz lwn-1ba3b5dc14923021f7815ea0b63baa34a3e1a998.zip |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates from Ingo Molnar:
"perf.data:
- Streaming compression of perf ring buffer into
PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x
perf.data file size reduction on variety of tested workloads what
saves storage space on larger server systems where perf.data size
can easily reach several tens or even hundreds of GiBs, especially
when profiling with DWARF-based stacks and tracing of context
switches.
perf record:
- Improve -user-regs/intr-regs suggestions to overcome errors
perf annotate:
- Remove hist__account_cycles() from callback, speeding up branch
processing (perf record -b)
perf stat:
- Add a 'percore' event qualifier, e.g.: -e
cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for
both hardware threads in a core.
We can already do this with --per-core, but it's often useful to do
this together with other metrics that are collected per hardware
thread.
I.e. now its possible to do this per-event, and have it mixed with
other events not aggregated by core.
arm64:
- Map Brahma-B53 CPUID to cortex-a53 events.
- Add Cortex-A57 and Cortex-A72 events.
csky:
- Add DWARF register mappings for libdw, allowing --call-graph=dwarf
to work on the C-SKY arch.
x86:
- Add support for recording and printing XMM registers, available,
for instance, on Icelake.
- Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON
support. UPI replaced the Intel QuickPath Interconnect (QPI) in
Xeon Skylake-SP.
Intel PT:
- Fix instructions sampling rate.
- Timestamp fixes.
- Improve exported-sql-viewer GUI, allowing, for instance, to
copy'n'paste the trees, useful for e-mailing"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (73 commits)
perf stat: Support 'percore' event qualifier
perf stat: Factor out aggregate counts printing
perf tools: Add a 'percore' event qualifier
perf docs: Add description for stderr
perf intel-pt: Fix sample timestamp wrt non-taken branches
perf intel-pt: Fix improved sample timestamp
perf intel-pt: Fix instructions sampling rate
perf regs x86: Add X86 specific arch__intr_reg_mask()
perf parse-regs: Add generic support for arch__intr/user_reg_mask()
perf parse-regs: Split parse_regs
perf vendor events arm64: Add Cortex-A57 and Cortex-A72 events
perf vendor events arm64: Map Brahma-B53 CPUID to cortex-a53 events
perf vendor events arm64: Remove [[:xdigit:]] wildcard
perf jevents: Remove unused variable
perf test zstd: Fixup verbose mode output
perf tests: Implement Zstd comp/decomp integration test
perf inject: Enable COMPRESSED record decompression
perf report: Implement perf.data record decompression
perf record: Implement -z,--compression_level[=<n>] option
perf report: Add stub processing of compressed events for -D
...
Diffstat (limited to 'tools/perf/util/stat-display.c')
-rw-r--r-- | tools/perf/util/stat-display.c | 107 |
1 files changed, 78 insertions, 29 deletions
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 3324f23c7efc..4c53bae5644b 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], config->csv_sep); + if (evsel->percore) { + fprintf(config->output, "S%d-C%*d%s", + cpu_map__id_to_socket(id), + config->csv_output ? 0 : -5, + cpu_map__id_to_cpu(id), config->csv_sep); + } else { + fprintf(config->output, "CPU%*d%s ", + config->csv_output ? 0 : -5, + perf_evsel__cpus(evsel)->map[id], + config->csv_sep); + } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", @@ -594,6 +602,41 @@ static void aggr_cb(struct perf_stat_config *config, } } +static void print_counter_aggrdata(struct perf_stat_config *config, + struct perf_evsel *counter, int s, + char *prefix, bool metric_only, + bool *first) +{ + struct aggr_data ad; + FILE *output = config->output; + u64 ena, run, val; + int id, nr; + double uval; + + ad.id = id = config->aggr_map->map[s]; + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + return; + + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (*first && metric_only) { + *first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); +} + static void print_aggr(struct perf_stat_config *config, struct perf_evlist *evlist, char *prefix) @@ -601,9 +644,7 @@ static void print_aggr(struct perf_stat_config *config, bool metric_only = config->metric_only; FILE *output = config->output; struct perf_evsel *counter; - int s, id, nr; - double uval; - u64 ena, run, val; + int s; bool first; if (!(config->aggr_map || config->aggr_get_id)) @@ -616,33 +657,14 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - ad.id = id = config->aggr_map->map[s]; first = true; evlist__for_each_entry(evlist, counter) { - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) - continue; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (first && metric_only) { - first = false; - aggr_printout(config, counter, id, nr); - } - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); - if (!metric_only) - fputc('\n', output); + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); } if (metric_only) fputc('\n', output); @@ -1089,6 +1111,30 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + int s; + bool first = true; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + for (s = 0; s < config->aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); + } + + if (metric_only) + fputc('\n', output); +} + void perf_evlist__print_counters(struct perf_evlist *evlist, struct perf_stat_config *config, @@ -1139,7 +1185,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist, print_no_aggr_metric(config, evlist, prefix); else { evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, prefix); + if (counter->percore) + print_percore(config, counter, prefix); + else + print_counter(config, counter, prefix); } } break; |