summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-script.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/perf/builtin-script.c
parent02f9a04d76b76b80b05ddc33ceabe806b84fda3c (diff)
parentab0809af0bee88b689ba289ec8c40aa2be3a17ec (diff)
downloadlwn-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.tar.gz
lwn-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.zip
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r--tools/perf/builtin-script.c154
1 files changed, 117 insertions, 37 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index fa478ddcd18a..a2f117936188 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -124,6 +124,7 @@ enum perf_output_field {
PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33,
PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34,
PERF_OUTPUT_INS_LAT = 1ULL << 35,
+ PERF_OUTPUT_BRSTACKINSNLEN = 1ULL << 36,
};
struct perf_script {
@@ -191,6 +192,7 @@ struct output_option {
{.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
{.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
{.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT},
+ {.str = "brstackinsnlen", .field = PERF_OUTPUT_BRSTACKINSNLEN},
};
enum {
@@ -488,7 +490,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
"selected. Hence, no address to lookup the source line number.\n");
return -EINVAL;
}
- if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
+ if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set &&
!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
@@ -857,6 +859,16 @@ mispred_str(struct branch_entry *br)
return br->flags.predicted ? 'P' : 'M';
}
+static int print_bstack_flags(FILE *fp, struct branch_entry *br)
+{
+ return fprintf(fp, "/%c/%c/%c/%d/%s ",
+ mispred_str(br),
+ br->flags.in_tx ? 'X' : '-',
+ br->flags.abort ? 'A' : '-',
+ br->flags.cycles,
+ br->flags.type ? branch_type_name(br->flags.type) : "-");
+}
+
static int perf_sample__fprintf_brstack(struct perf_sample *sample,
struct thread *thread,
struct perf_event_attr *attr, FILE *fp)
@@ -895,11 +907,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
- printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(entries + i),
- entries[i].flags.in_tx ? 'X' : '-',
- entries[i].flags.abort ? 'A' : '-',
- entries[i].flags.cycles);
+ printed += print_bstack_flags(fp, entries + i);
}
return printed;
@@ -941,11 +949,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
printed += map__fprintf_dsoname(alt.map, fp);
printed += fprintf(fp, ")");
}
- printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(entries + i),
- entries[i].flags.in_tx ? 'X' : '-',
- entries[i].flags.abort ? 'A' : '-',
- entries[i].flags.cycles);
+ printed += print_bstack_flags(fp, entries + i);
}
return printed;
@@ -991,11 +995,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
printed += map__fprintf_dsoname(alt.map, fp);
printed += fprintf(fp, ")");
}
- printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(entries + i),
- entries[i].flags.in_tx ? 'X' : '-',
- entries[i].flags.abort ? 'A' : '-',
- entries[i].flags.cycles);
+ printed += print_bstack_flags(fp, entries + i);
}
return printed;
@@ -1122,10 +1122,17 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr)
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
- int insn, FILE *fp, int *total_cycles)
+ int insn, FILE *fp, int *total_cycles,
+ struct perf_event_attr *attr)
{
- int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
- dump_insn(x, ip, inbuf, len, NULL),
+ int ilen = 0;
+ int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
+ dump_insn(x, ip, inbuf, len, &ilen));
+
+ if (PRINT_FIELD(BRSTACKINSNLEN))
+ printed += fprintf(fp, "ilen: %d\t", ilen);
+
+ printed += fprintf(fp, "#%s%s%s%s",
en->flags.predicted ? " PRED" : "",
en->flags.mispred ? " MISPRED" : "",
en->flags.in_tx ? " INTX" : "",
@@ -1211,7 +1218,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
- &x, buffer, len, 0, fp, &total_cycles);
+ &x, buffer, len, 0, fp, &total_cycles,
+ attr);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
}
@@ -1242,14 +1250,17 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
- &total_cycles);
+ &total_cycles, attr);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
break;
} else {
ilen = 0;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
dump_insn(&x, ip, buffer + off, len - off, &ilen));
+ if (PRINT_FIELD(BRSTACKINSNLEN))
+ printed += fprintf(fp, "\tilen: %d", ilen);
+ printed += fprintf(fp, "\n");
if (ilen == 0)
break;
if (PRINT_FIELD(SRCCODE))
@@ -1292,16 +1303,23 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len <= 0)
goto out;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
- dump_insn(&x, sample->ip, buffer, len, NULL));
+ ilen = 0;
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
+ dump_insn(&x, sample->ip, buffer, len, &ilen));
+ if (PRINT_FIELD(BRSTACKINSNLEN))
+ printed += fprintf(fp, "\tilen: %d", ilen);
+ printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, sample->ip);
goto out;
}
for (off = 0; off <= end - start; off += ilen) {
ilen = 0;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+ if (PRINT_FIELD(BRSTACKINSNLEN))
+ printed += fprintf(fp, "\tilen: %d", ilen);
+ printed += fprintf(fp, "\n");
if (ilen == 0)
break;
if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) {
@@ -1459,7 +1477,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
for (i = 0; i < sample->insn_len; i++)
printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]);
}
- if (PRINT_FIELD(BRSTACKINSN))
+ if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
return printed;
@@ -1579,26 +1597,34 @@ static const char *sample_flags_to_name(u32 flags)
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
{
+ u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
+ PERF_IP_FLAG_INTR_TOGGLE;
const char *chars = PERF_IP_FLAG_CHARS;
const size_t n = strlen(PERF_IP_FLAG_CHARS);
- bool in_tx = flags & PERF_IP_FLAG_IN_TX;
const char *name = NULL;
size_t i, pos = 0;
+ char xs[16] = {0};
- name = sample_flags_to_name(flags & ~PERF_IP_FLAG_IN_TX);
+ if (flags & xf)
+ snprintf(xs, sizeof(xs), "(%s%s%s)",
+ flags & PERF_IP_FLAG_IN_TX ? "x" : "",
+ flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
+ flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
+
+ name = sample_flags_to_name(flags & ~xf);
if (name)
- return snprintf(str, sz, "%-15s%4s", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "%-15s%6s", name, xs);
if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
- name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_BEGIN));
+ name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
if (name)
- return snprintf(str, sz, "tr strt %-7s%4s", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
}
if (flags & PERF_IP_FLAG_TRACE_END) {
- name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_END));
+ name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
if (name)
- return snprintf(str, sz, "tr end %-7s%4s", name, in_tx ? "(x)" : "");
+ return snprintf(str, sz, "tr end %-7s%6s", name, xs);
}
for (i = 0; i < n; i++, flags >>= 1) {
@@ -1620,7 +1646,7 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
char str[SAMPLE_FLAGS_BUF_SIZE];
perf_sample__sprintf_flags(flags, str, sizeof(str));
- return fprintf(fp, " %-19s ", str);
+ return fprintf(fp, " %-21s ", str);
}
struct printer_data {
@@ -1811,6 +1837,56 @@ static int perf_sample__fprintf_synth_psb(struct perf_sample *sample, FILE *fp)
return len + perf_sample__fprintf_pt_spacing(len, fp);
}
+/* Intel PT Event Trace */
+static int perf_sample__fprintf_synth_evt(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_evt *data = perf_sample__synth_ptr(sample);
+ const char *cfe[32] = {NULL, "INTR", "IRET", "SMI", "RSM", "SIPI",
+ "INIT", "VMENTRY", "VMEXIT", "VMEXIT_INTR",
+ "SHUTDOWN"};
+ const char *evd[64] = {"PFA", "VMXQ", "VMXR"};
+ const char *s;
+ int len, i;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ s = cfe[data->type];
+ if (s) {
+ len = fprintf(fp, " cfe: %s IP: %d vector: %u",
+ s, data->ip, data->vector);
+ } else {
+ len = fprintf(fp, " cfe: %u IP: %d vector: %u",
+ data->type, data->ip, data->vector);
+ }
+ for (i = 0; i < data->evd_cnt; i++) {
+ unsigned int et = data->evd[i].evd_type & 0x3f;
+
+ s = evd[et];
+ if (s) {
+ len += fprintf(fp, " %s: %#" PRIx64,
+ s, data->evd[i].payload);
+ } else {
+ len += fprintf(fp, " EVD_%u: %#" PRIx64,
+ et, data->evd[i].payload);
+ }
+ }
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_iflag_chg(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_iflag_chg *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " IFLAG: %d->%d %s branch", !data->iflag, data->iflag,
+ data->via_branch ? "via" : "non");
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
static int perf_sample__fprintf_synth(struct perf_sample *sample,
struct evsel *evsel, FILE *fp)
{
@@ -1829,6 +1905,10 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return perf_sample__fprintf_synth_cbr(sample, fp);
case PERF_SYNTH_INTEL_PSB:
return perf_sample__fprintf_synth_psb(sample, fp);
+ case PERF_SYNTH_INTEL_EVT:
+ return perf_sample__fprintf_synth_evt(sample, fp);
+ case PERF_SYNTH_INTEL_IFLAG_CHG:
+ return perf_sample__fprintf_synth_iflag_chg(sample, fp);
default:
break;
}
@@ -3716,7 +3796,7 @@ int cmd_script(int argc, const char **argv)
"Valid types: hw,sw,trace,raw,synth. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
- "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
+ "brstacksym,flags,bpf-output,brstackinsn,brstackinsnlen,brstackoff,"
"callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
"data_page_size,code_page_size,ins_lat",
parse_output_fields),
@@ -3842,13 +3922,13 @@ int cmd_script(int argc, const char **argv)
if (symbol__validate_sym_arguments())
return -1;
- if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
+ if (argc > 1 && strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
if (!rec_script_path)
return cmd_record(argc, argv);
}
- if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) {
+ if (argc > 1 && strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
rep_script_path = get_script_path(argv[1], REPORT_SUFFIX);
if (!rep_script_path) {
fprintf(stderr,