From 25c2e5911705c69d7b01c35ea0b32cbe6ada67cd Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:38 +0300 Subject: perf tools riscv: Add support for get_cpuid_str function The get_cpuid_str function returns the string that contains values of MVENDORID, MARCHID and MIMPID in hex format separated by coma. The values themselves are taken from first cpu entry in "/proc/cpuid" that contains "mvendorid", "marchid" and "mimpid". Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-2-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/riscv/util/Build | 1 + tools/perf/arch/riscv/util/header.c | 104 ++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 tools/perf/arch/riscv/util/header.c diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 7d3050134ae0..603dbb5ae4dc 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,4 +1,5 @@ perf-y += perf_regs.o +perf-y += header.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c new file mode 100644 index 000000000000..4a41856938a8 --- /dev/null +++ b/tools/perf/arch/riscv/util/header.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin + */ + +#include +#include +#include +#include +#include "../../util/debug.h" +#include "../../util/header.h" + +#define CPUINFO_MVEN "mvendorid" +#define CPUINFO_MARCH "marchid" +#define CPUINFO_MIMP "mimpid" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + char *line = NULL; + char *mvendorid = NULL; + char *marchid = NULL; + char *mimpid = NULL; + char *cpuid = NULL; + int read; + unsigned long line_sz; + FILE *cpuinfo; + + cpuinfo = fopen(CPUINFO, "r"); + if (cpuinfo == NULL) + return cpuid; + + while ((read = getline(&line, &line_sz, cpuinfo)) != -1) { + if (!strncmp(line, CPUINFO_MVEN, strlen(CPUINFO_MVEN))) { + mvendorid = _get_field(line); + if (!mvendorid) + goto free; + } else if (!strncmp(line, CPUINFO_MARCH, strlen(CPUINFO_MARCH))) { + marchid = _get_field(line); + if (!marchid) + goto free; + } else if (!strncmp(line, CPUINFO_MIMP, strlen(CPUINFO_MIMP))) { + mimpid = _get_field(line); + if (!mimpid) + goto free; + + break; + } + } + + if (!mvendorid || !marchid || !mimpid) + goto free; + + if (asprintf(&cpuid, "%s-%s-%s", mvendorid, marchid, mimpid) < 0) + cpuid = NULL; + +free: + fclose(cpuinfo); + free(mvendorid); + free(marchid); + free(mimpid); + + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + char *cpuid = _get_cpuid(); + int ret = 0; + + if (sz < strlen(cpuid)) { + ret = -EINVAL; + goto free; + } + + scnprintf(buffer, sz, "%s", cpuid); +free: + free(cpuid); + return ret; +} + +char * +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} -- cgit v1.2.3 From 8f0dcb4e7364af3e5b156e6a3fdac0860733eb86 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:39 +0300 Subject: perf arch events: riscv sbi firmware std event files Firmware events are defined by "RISC-V Supervisor Binary Interface Specification", which means they should be always available as long as firmware supports >= 0.3.0 SBI. Expose them to arch std events, so they can be reused by particular PMU bindings. Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-3-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/riscv/riscv-sbi-firmware.json | 134 +++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json new file mode 100644 index 000000000000..a9939823b14b --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -0,0 +1,134 @@ +[ + { + "PublicDescription": "Misaligned load trap", + "ConfigCode": "0x8000000000000000", + "EventName": "FW_MISALIGNED_LOAD", + "BriefDescription": "Misaligned load trap event" + }, + { + "PublicDescription": "Misaligned store trap", + "ConfigCode": "0x8000000000000001", + "EventName": "FW_MISALIGNED_STORE", + "BriefDescription": "Misaligned store trap event" + }, + { + "PublicDescription": "Load access trap", + "ConfigCode": "0x8000000000000002", + "EventName": "FW_ACCESS_LOAD", + "BriefDescription": "Load access trap event" + }, + { + "PublicDescription": "Store access trap", + "ConfigCode": "0x8000000000000003", + "EventName": "FW_ACCESS_STORE", + "BriefDescription": "Store access trap event" + }, + { + "PublicDescription": "Illegal instruction trap", + "ConfigCode": "0x8000000000000004", + "EventName": "FW_ILLEGAL_INSN", + "BriefDescription": "Illegal instruction trap event" + }, + { + "PublicDescription": "Set timer event", + "ConfigCode": "0x8000000000000005", + "EventName": "FW_SET_TIMER", + "BriefDescription": "Set timer event" + }, + { + "PublicDescription": "Sent IPI to other HART event", + "ConfigCode": "0x8000000000000006", + "EventName": "FW_IPI_SENT", + "BriefDescription": "Sent IPI to other HART event" + }, + { + "PublicDescription": "Received IPI from other HART event", + "ConfigCode": "0x8000000000000007", + "EventName": "FW_IPI_RECEIVED", + "BriefDescription": "Received IPI from other HART event" + }, + { + "PublicDescription": "Sent FENCE.I request to other HART event", + "ConfigCode": "0x8000000000000008", + "EventName": "FW_FENCE_I_SENT", + "BriefDescription": "Sent FENCE.I request to other HART event" + }, + { + "PublicDescription": "Received FENCE.I request from other HART event", + "ConfigCode": "0x8000000000000009", + "EventName": "FW_FENCE_I_RECEIVED", + "BriefDescription": "Received FENCE.I request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA request to other HART event", + "ConfigCode": "0x800000000000000a", + "EventName": "FW_SFENCE_VMA_SENT", + "BriefDescription": "Sent SFENCE.VMA request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA request from other HART event", + "ConfigCode": "0x800000000000000b", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Received SFENCE.VMA request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", + "ConfigCode": "0x800000000000000c", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA with ASID request from other HART event", + "ConfigCode": "0x800000000000000d", + "EventName": "FW_SFENCE_VMA_ASID_RECEIVED", + "BriefDescription": "Received SFENCE.VMA with ASID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA request to other HART event", + "ConfigCode": "0x800000000000000e", + "EventName": "FW_HFENCE_GVMA_SENT", + "BriefDescription": "Sent HFENCE.GVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA request from other HART event", + "ConfigCode": "0x800000000000000f", + "EventName": "FW_HFENCE_GVMA_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA with VMID request to other HART event", + "ConfigCode": "0x8000000000000010", + "EventName": "FW_HFENCE_GVMA_VMID_SENT", + "BriefDescription": "Sent HFENCE.GVMA with VMID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA with VMID request from other HART event", + "ConfigCode": "0x8000000000000011", + "EventName": "FW_HFENCE_GVMA_VMID_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA with VMID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA request to other HART event", + "ConfigCode": "0x8000000000000012", + "EventName": "FW_HFENCE_VVMA_SENT", + "BriefDescription": "Sent HFENCE.VVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA request from other HART event", + "ConfigCode": "0x8000000000000013", + "EventName": "FW_HFENCE_VVMA_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA with ASID request to other HART event", + "ConfigCode": "0x8000000000000014", + "EventName": "FW_HFENCE_VVMA_ASID_SENT", + "BriefDescription": "Sent HFENCE.VVMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA with ASID request from other HART event", + "ConfigCode": "0x8000000000000015", + "EventName": "FW_HFENCE_VVMA_ASID_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA with ASID request from other HART event" + } +] -- cgit v1.2.3 From c4f769d4093d114f3c374ba06d6eef1fb763b56c Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 15 Aug 2022 16:22:40 +0300 Subject: perf vendor events riscv: add Sifive U74 JSON file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch add the Sifive U74 JSON file. Link: https://sifive.cdn.prismic.io/sifive/ad5577a0-9a00-45c9-a5d0-424a3d586060_u74_core_complex_manual_21G3.pdf Derived-from-code-by: João Mário Domingos Signed-off-by: Nikita Shubin Tested-by: Kautuk Consul Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Alexander Shishkin Cc: Anup Patel Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Cc: linux@yadro.com Link: https://lore.kernel.org/r/20220815132251.25702-4-nikita.shubin@maquefel.me Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/riscv/mapfile.csv | 17 ++++ .../pmu-events/arch/riscv/sifive/u74/firmware.json | 68 ++++++++++++++++ .../arch/riscv/sifive/u74/instructions.json | 92 ++++++++++++++++++++++ .../pmu-events/arch/riscv/sifive/u74/memory.json | 32 ++++++++ .../arch/riscv/sifive/u74/microarch.json | 57 ++++++++++++++ 5 files changed, 266 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv new file mode 100644 index 000000000000..c61b3d6ef616 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -0,0 +1,17 @@ +# Format: +# MVENDORID-MARCHID-MIMPID,Version,JSON/file/pathname,Type +# +# where +# MVENDORID JEDEC code of the core provider +# MARCHID base microarchitecture of the hart +# MIMPID unique encoding of the version +# of the processor implementation +# Version could be used to track version of JSON file +# but currently unused. +# JSON/file/pathname is the path to JSON file, relative +# to tools/perf/pmu-events/arch/riscv/. +# Type is core, uncore etc +# +# +#MVENDORID-MARCHID-MIMPID,Version,Filename,EventType +0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json new file mode 100644 index 000000000000..5eab718c9256 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json @@ -0,0 +1,92 @@ +[ + { + "EventName": "EXCEPTION_TAKEN", + "EventCode": "0x0000100", + "BriefDescription": "Exception taken" + }, + { + "EventName": "INTEGER_LOAD_RETIRED", + "EventCode": "0x0000200", + "BriefDescription": "Integer load instruction retired" + }, + { + "EventName": "INTEGER_STORE_RETIRED", + "EventCode": "0x0000400", + "BriefDescription": "Integer store instruction retired" + }, + { + "EventName": "ATOMIC_MEMORY_RETIRED", + "EventCode": "0x0000800", + "BriefDescription": "Atomic memory operation retired" + }, + { + "EventName": "SYSTEM_INSTRUCTION_RETIRED", + "EventCode": "0x0001000", + "BriefDescription": "System instruction retired" + }, + { + "EventName": "INTEGER_ARITHMETIC_RETIRED", + "EventCode": "0x0002000", + "BriefDescription": "Integer arithmetic instruction retired" + }, + { + "EventName": "CONDITIONAL_BRANCH_RETIRED", + "EventCode": "0x0004000", + "BriefDescription": "Conditional branch retired" + }, + { + "EventName": "JAL_INSTRUCTION_RETIRED", + "EventCode": "0x0008000", + "BriefDescription": "JAL instruction retired" + }, + { + "EventName": "JALR_INSTRUCTION_RETIRED", + "EventCode": "0x0010000", + "BriefDescription": "JALR instruction retired" + }, + { + "EventName": "INTEGER_MULTIPLICATION_RETIRED", + "EventCode": "0x0020000", + "BriefDescription": "Integer multiplication instruction retired" + }, + { + "EventName": "INTEGER_DIVISION_RETIRED", + "EventCode": "0x0040000", + "BriefDescription": "Integer division instruction retired" + }, + { + "EventName": "FP_LOAD_RETIRED", + "EventCode": "0x0080000", + "BriefDescription": "Floating-point load instruction retired" + }, + { + "EventName": "FP_STORE_RETIRED", + "EventCode": "0x0100000", + "BriefDescription": "Floating-point store instruction retired" + }, + { + "EventName": "FP_ADDITION_RETIRED", + "EventCode": "0x0200000", + "BriefDescription": "Floating-point addition retired" + }, + { + "EventName": "FP_MULTIPLICATION_RETIRED", + "EventCode": "0x0400000", + "BriefDescription": "Floating-point multiplication retired" + }, + { + "EventName": "FP_FUSEDMADD_RETIRED", + "EventCode": "0x0800000", + "BriefDescription": "Floating-point fused multiply-add retired" + }, + { + "EventName": "FP_DIV_SQRT_RETIRED", + "EventCode": "0x1000000", + "BriefDescription": "Floating-point division or square-root retired" + }, + { + "EventName": "OTHER_FP_RETIRED", + "EventCode": "0x2000000", + "BriefDescription": "Other floating-point instruction retired" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json new file mode 100644 index 000000000000..be1a46312ac3 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json @@ -0,0 +1,32 @@ +[ + { + "EventName": "ICACHE_RETIRED", + "EventCode": "0x0000102", + "BriefDescription": "Instruction cache miss" + }, + { + "EventName": "DCACHE_MISS_MMIO_ACCESSES", + "EventCode": "0x0000202", + "BriefDescription": "Data cache miss or memory-mapped I/O access" + }, + { + "EventName": "DCACHE_WRITEBACK", + "EventCode": "0x0000402", + "BriefDescription": "Data cache write-back" + }, + { + "EventName": "INST_TLB_MISS", + "EventCode": "0x0000802", + "BriefDescription": "Instruction TLB miss" + }, + { + "EventName": "DATA_TLB_MISS", + "EventCode": "0x0001002", + "BriefDescription": "Data TLB miss" + }, + { + "EventName": "UTLB_MISS", + "EventCode": "0x0002002", + "BriefDescription": "UTLB miss" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json new file mode 100644 index 000000000000..50ffa55418cb --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json @@ -0,0 +1,57 @@ +[ + { + "EventName": "ADDRESSGEN_INTERLOCK", + "EventCode": "0x0000101", + "BriefDescription": "Address-generation interlock" + }, + { + "EventName": "LONGLAT_INTERLOCK", + "EventCode": "0x0000201", + "BriefDescription": "Long-latency interlock" + }, + { + "EventName": "CSR_READ_INTERLOCK", + "EventCode": "0x0000401", + "BriefDescription": "CSR read interlock" + }, + { + "EventName": "ICACHE_ITIM_BUSY", + "EventCode": "0x0000801", + "BriefDescription": "Instruction cache/ITIM busy" + }, + { + "EventName": "DCACHE_DTIM_BUSY", + "EventCode": "0x0001001", + "BriefDescription": "Data cache/DTIM busy" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0002001", + "BriefDescription": "Branch direction misprediction" + }, + { + "EventName": "BRANCH_TARGET_MISPREDICTION", + "EventCode": "0x0004001", + "BriefDescription": "Branch/jump target misprediction" + }, + { + "EventName": "PIPE_FLUSH_CSR_WRITE", + "EventCode": "0x0008001", + "BriefDescription": "Pipeline flush from CSR write" + }, + { + "EventName": "PIPE_FLUSH_OTHER_EVENT", + "EventCode": "0x0010001", + "BriefDescription": "Pipeline flush from other event" + }, + { + "EventName": "INTEGER_MULTIPLICATION_INTERLOCK", + "EventCode": "0x0020001", + "BriefDescription": "Integer multiplication interlock" + }, + { + "EventName": "FP_INTERLOCK", + "EventCode": "0x0040001", + "BriefDescription": "Floating-point interlock" + } +] \ No newline at end of file -- cgit v1.2.3 From f7400262ea2192fc79b6f5a68242b3495b016981 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:08 -0700 Subject: perf tools: Save evsel->pmu in parse_events() Now evsel has a pmu pointer, let's save the info and use it like in evsel__find_pmu(). The missing feature check needs to be changed as the pmu pointer can be set from the beginning. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 20 ++++++++++---------- tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 4 ++++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde3507..b7140beca970 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -467,6 +467,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; + evsel->pmu = orig->pmu; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; @@ -1966,17 +1967,16 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && - (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { - if (evsel->pmu == NULL) { + } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { + if (evsel->pmu == NULL) evsel->pmu = evsel__find_pmu(evsel); - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } + + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; } if (evsel->exclude_GH) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5973f46c2375..6502cd679f57 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; + evsel->pmu = pmu; if (name) evsel->name = strdup(name); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 03284059175f..6a86e6af0903 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1065,11 +1065,15 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel) { struct perf_pmu *pmu = NULL; + if (evsel->pmu) + return evsel->pmu; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { if (pmu->type == evsel->core.attr.type) break; } + evsel->pmu = pmu; return pmu; } -- cgit v1.2.3 From b86ac6796b6c1dea83b744812d36922c21f43323 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:09 -0700 Subject: perf tools: Use pmu info in evsel__is_hybrid() If evsel has pmu, it can use pmu->is_hybrid directly. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b7140beca970..c9fef26d0702 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3131,6 +3131,9 @@ void evsel__zero_per_pkg(struct evsel *evsel) bool evsel__is_hybrid(struct evsel *evsel) { + if (evsel->pmu) + return evsel->pmu->is_hybrid; + return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } -- cgit v1.2.3 From 93d5e700156e03e66eb1bf2158ba3b8a8b354c71 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:10 -0700 Subject: perf stat: Use evsel__is_hybrid() more In the stat-display code, it needs to check if the current evsel is hybrid but it uses perf_pmu__has_hybrid() which can return true for non-hybrid event too. I think it's better to use evsel__is_hybrid(). Also remove a NULL check for the 'config' parameter in the hybrid_merge() since it's called after config->no_merge check. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5c47ee9963a7..4113aa86772f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -704,7 +704,7 @@ static void uniquify_event_name(struct evsel *counter) counter->name = new_name; } } else { - if (perf_pmu__has_hybrid()) { + if (evsel__is_hybrid(counter)) { ret = asprintf(&new_name, "%s/%s/", counter->pmu_name, counter->name); } else { @@ -744,26 +744,14 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c } } -static bool is_uncore(struct evsel *evsel) -{ - struct perf_pmu *pmu = evsel__find_pmu(evsel); - - return pmu && pmu->is_uncore; -} - -static bool hybrid_uniquify(struct evsel *evsel) -{ - return perf_pmu__has_hybrid() && !is_uncore(evsel); -} - static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, bool check) { - if (hybrid_uniquify(counter)) { + if (evsel__is_hybrid(counter)) { if (check) - return config && config->hybrid_merge; + return config->hybrid_merge; else - return config && !config->hybrid_merge; + return !config->hybrid_merge; } return false; -- cgit v1.2.3 From 375369abcdb774abadbc1c82d2e6c24f0f1f49a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:11 -0700 Subject: perf stat: Add aggr id for global mode To make the code simpler, I'd like to use the same aggregation code for the global mode. We can simply add an id function to return cpu 0 and use print_aggr(). No functional change intended. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 36 ++++++++++++++++++++++++++++++++++-- tools/perf/util/cpumap.c | 10 ++++++++++ tools/perf/util/cpumap.h | 6 +++++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 265b05157972..75d16e9705a4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1330,6 +1330,12 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __ return aggr_cpu_id__node(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__global(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { @@ -1366,6 +1372,12 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } +static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_global, cpu); +} + static bool term_percore_set(void) { struct evsel *counter; @@ -1395,6 +1407,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) return NULL; case AGGR_GLOBAL: + return aggr_cpu_id__global; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1420,6 +1433,7 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) } return NULL; case AGGR_GLOBAL: + return perf_stat__get_global_cached; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1535,6 +1549,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused, + void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + id.cpu = (struct perf_cpu){ .cpu = 0 }; + return id; +} + static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1558,6 +1582,12 @@ static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *conf return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1569,8 +1599,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: return perf_env__get_node_aggr_by_cpu; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_env__get_global_aggr_by_cpu; + case AGGR_NONE: case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1590,8 +1621,9 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) return perf_stat__get_core_file; case AGGR_NODE: return perf_stat__get_node_file; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_stat__get_global_file; + case AGGR_NONE: case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8486ca3bec75..60209fe87456 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -354,6 +354,16 @@ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unu return id; } +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + cpu.cpu = 0; + id.cpu = cpu; + return id; +} + /* setup simple routines to easily access node numbers given a cpu number */ static int get_max_num(char *path, int *max) { diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 4a6d029576ee..b2ff648bc417 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -133,5 +133,9 @@ struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); * cpu. The function signature is compatible with aggr_cpu_id_get_t. */ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); - +/** + * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation. + * The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ -- cgit v1.2.3 From 8938cfa7480282fb3c75548958b239444affcc50 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:12 -0700 Subject: perf stat: Add cpu aggr id for no aggregation mode Likewise, add an aggr_id for cpu for none aggregation mode. This is not used actually yet but later code will use to unify the aggregation code. No functional change intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 48 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 75d16e9705a4..b03b530fe9a6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1336,6 +1336,12 @@ static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config return aggr_cpu_id__global(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__cpu(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { @@ -1378,6 +1384,12 @@ static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config * return perf_stat__get_aggr(config, perf_stat__get_global, cpu); } +static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); +} + static bool term_percore_set(void) { struct evsel *counter; @@ -1404,8 +1416,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NONE: if (term_percore_set()) return aggr_cpu_id__core; - - return NULL; + return aggr_cpu_id__cpu; case AGGR_GLOBAL: return aggr_cpu_id__global; case AGGR_THREAD: @@ -1428,10 +1439,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) { + if (term_percore_set()) return perf_stat__get_core_cached; - } - return NULL; + return perf_stat__get_cpu_cached; case AGGR_GLOBAL: return perf_stat__get_global_cached; case AGGR_THREAD: @@ -1541,6 +1551,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data) +{ + struct perf_env *env = data; + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + if (cpu.cpu != -1) { + /* + * core_id is relative to socket and die, + * we need a global id. So we set + * socket, die id and core id + */ + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; + id.cpu = cpu; + } + + return id; +} + static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -1576,6 +1606,12 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1602,6 +1638,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) case AGGR_GLOBAL: return perf_env__get_global_aggr_by_cpu; case AGGR_NONE: + return perf_env__get_cpu_aggr_by_cpu; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1624,6 +1661,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) case AGGR_GLOBAL: return perf_stat__get_global_file; case AGGR_NONE: + return perf_stat__get_cpu_file; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: -- cgit v1.2.3 From 505ac48ba759cd88d9fd40ec5354cda28e17377b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:13 -0700 Subject: perf stat: Add 'needs_sort' argument to cpu_aggr_map__new() In case of no aggregation, it needs to keep the original (cpu) ordering in the aggr_map so that it can be in sync with the cpu map. This will make the code easier to handle AGGR_NONE similar to others. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 +++++-- tools/perf/util/cpumap.c | 6 ++++-- tools/perf/util/cpumap.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b03b530fe9a6..9053fd4d15a7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1458,8 +1458,9 @@ static int perf_stat_init_aggr_mode(void) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); if (get_id) { + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, - get_id, /*data=*/NULL); + get_id, /*data=*/NULL, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1674,11 +1675,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; if (!get_id) return 0; - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env); + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, + get_id, env, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 60209fe87456..6e3fcf523de9 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -234,7 +234,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data) + void *data, bool needs_sort) { int idx; struct perf_cpu cpu; @@ -270,8 +270,10 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, if (trimmed_c) c = trimmed_c; } + /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + if (needs_sort) + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); return c; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b2ff648bc417..da28b3146ef9 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -97,7 +97,7 @@ typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data) */ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data); + void *data, bool needs_sort); bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); -- cgit v1.2.3 From ca68b374d0409bea5cacd4c5a8e0fbb407922d38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:14 -0700 Subject: perf stat: Add struct perf_stat_aggr to perf_stat_evsel The perf_stat_aggr struct is to keep aggregated counter values and the states according to the aggregation mode. The number of entries is depends on the mode and this is a preparation for the later use. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 34 +++++++++++++++++++++++++++------- tools/perf/util/stat.h | 19 +++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8ec8bb4a9912..c9d5aa295b54 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -133,15 +133,33 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) static void evsel__reset_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; + struct perf_stat_aggr *aggr = ps->aggr; init_stats(&ps->res_stats); + + if (aggr) + memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } -static int evsel__alloc_stat_priv(struct evsel *evsel) + +static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { - evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->stats == NULL) + struct perf_stat_evsel *ps; + + ps = zalloc(sizeof(*ps)); + if (ps == NULL) return -ENOMEM; + + if (nr_aggr) { + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) { + free(ps); + return -ENOMEM; + } + } + + evsel->stats = ps; perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; @@ -151,8 +169,10 @@ static void evsel__free_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; - if (ps) + if (ps) { + zfree(&ps->aggr); zfree(&ps->group_data); + } zfree(&evsel->stats); } @@ -181,9 +201,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) perf_counts__reset(evsel->prev_raw_counts); } -static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) +static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) { - if (evsel__alloc_stat_priv(evsel) < 0 || + if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 || evsel__alloc_counts(evsel) < 0 || (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; @@ -196,7 +216,7 @@ int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, alloc_raw)) + if (evsel__alloc_stats(evsel, 0, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b0899c6e002f..42453513ffea 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -8,6 +8,7 @@ #include #include "cpumap.h" #include "rblist.h" +#include "counts.h" struct perf_cpu_map; struct perf_stat_config; @@ -42,9 +43,27 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; +/* hold aggregated event info */ +struct perf_stat_aggr { + /* aggregated values */ + struct perf_counts_values counts; + /* number of entries (CPUs) aggregated */ + int nr; + /* whether any entry has failed to read/process event */ + bool failed; +}; + +/* per-evsel event stats */ struct perf_stat_evsel { + /* used for repeated runs */ struct stats res_stats; + /* evsel id for quick check */ enum perf_stat_evsel_id id; + /* number of allocated 'aggr' */ + int nr_aggr; + /* aggregated event values */ + struct perf_stat_aggr *aggr; + /* used for group read */ u64 *group_data; }; -- cgit v1.2.3 From 1f297a6eb2bd90663518cbb6e9e2a3b2add34b73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:15 -0700 Subject: perf stat: Allocate evsel->stats->aggr properly The perf_stat_config.aggr_map should have a correct size of the aggregation map. Use it to allocate aggr_counts. Also AGGR_NONE with per-core events can be tricky because it doesn't aggreate basically but it needs to do so for per-core events only. So only per-core evsels will have stats->aggr data. Note that other caller of evlist__alloc_stat() might not have stat_config or aggr_map. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- tools/perf/builtin-stat.c | 6 +++--- tools/perf/tests/parse-metric.c | 2 +- tools/perf/tests/pmu-events.c | 2 +- tools/perf/util/stat.c | 9 +++++++-- tools/perf/util/stat.h | 3 ++- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7ca238277d83..d7ec8c1af293 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2049,7 +2049,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, u64 val; if (!evsel->stats) - evlist__alloc_stats(script->session->evlist, false); + evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false); if (evsel_script(leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; @@ -3632,7 +3632,7 @@ static int set_maps(struct perf_script *script) perf_evlist__set_maps(&evlist->core, script->cpus, script->threads); - if (evlist__alloc_stats(evlist, true)) + if (evlist__alloc_stats(&stat_config, evlist, /*alloc_raw=*/true)) return -ENOMEM; script->allocated = true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9053fd4d15a7..92a8e4512f98 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2121,7 +2121,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true)) return -ENOMEM; st->maps_allocated = true; @@ -2568,10 +2568,10 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__alloc_stats(evsel_list, interval)) + if (perf_stat_init_aggr_mode()) goto out; - if (perf_stat_init_aggr_mode()) + if (evlist__alloc_stats(&stat_config, evsel_list, interval)) goto out; /* diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 68f5a2a03242..21b7ac00d798 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -103,7 +103,7 @@ static int __compute_metric(const char *name, struct value *vals, if (err) goto out; - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 097e05c796ab..5d0d3b239a68 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -889,7 +889,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e goto out_err; } - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out_err; /* diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c9d5aa295b54..374149628507 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -211,12 +211,17 @@ static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) return 0; } -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw) { struct evsel *evsel; + int nr_aggr = 0; + + if (config && config->aggr_map) + nr_aggr = config->aggr_map->nr; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, 0, alloc_raw)) + if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw)) goto out_free; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 42453513ffea..0980875b9be1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -267,7 +267,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct runtime_stat *st); void perf_stat__collect_metric_expr(struct evlist *); -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw); void evlist__free_stats(struct evlist *evlist); void evlist__reset_stats(struct evlist *evlist); void evlist__reset_prev_raw_counts(struct evlist *evlist); -- cgit v1.2.3 From f976bc6b6bfc9b14eeaf9a8859191c8f85c253dc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:16 -0700 Subject: perf stat: Aggregate events using evsel->stats->aggr Add a logic to aggregate counter values to the new evsel->stats->aggr. This is not used yet so shadow stats are not updated. But later patch will convert the existing code to use it. With that, we don't need to handle AGGR_GLOBAL specially anymore. It can use the same logic with counts, prev_counts and aggr_counts. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 -- tools/perf/util/evsel.c | 9 +---- .../util/scripting-engines/trace-event-python.c | 6 --- tools/perf/util/stat.c | 46 ++++++++++++++++++---- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 92a8e4512f98..abede56d79b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -963,9 +963,6 @@ try_again_reset: init_stats(&walltime_nsecs_stats); update_stats(&walltime_nsecs_stats, t1 - t0); - if (stat_config.aggr_mode == AGGR_GLOBAL) - evlist__save_aggr_prev_raw_counts(evsel_list); - evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c9fef26d0702..cdde5b5f8ad2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1526,13 +1526,8 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, if (!evsel->prev_raw_counts) return; - if (cpu_map_idx == -1) { - tmp = evsel->prev_raw_counts->aggr; - evsel->prev_raw_counts->aggr = *count; - } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; - } + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; count->val = count->val - tmp.val; count->ena = count->ena - tmp.ena; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1f2040f36d4e..7bc8559dce6a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1653,12 +1653,6 @@ static void python_process_stat(struct perf_stat_config *config, struct perf_cpu_map *cpus = counter->core.cpus; int cpu, thread; - if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, - &counter->counts->aggr); - return; - } - for (thread = 0; thread < threads->nr; thread++) { for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { process_stat(counter, perf_cpu_map__cpu(cpus, cpu), diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 374149628507..99874254809d 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -387,6 +387,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; + struct perf_stat_evsel *ps = evsel->stats; static struct perf_counts_values zero; bool skip = false; @@ -398,6 +399,44 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, if (skip) count = &zero; + if (!evsel->snapshot) + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + + if (ps->aggr) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu); + struct perf_stat_aggr *ps_aggr; + int i; + + for (i = 0; i < ps->nr_aggr; i++) { + if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i])) + continue; + + ps_aggr = &ps->aggr[i]; + ps_aggr->nr++; + + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (count->ena == 0 || count->run == 0 || + evsel->counts->scaled == -1) { + ps_aggr->counts.val = 0; + ps_aggr->counts.ena = 0; + ps_aggr->counts.run = 0; + ps_aggr->failed = true; + } + + if (!ps_aggr->failed) { + ps_aggr->counts.val += count->val; + ps_aggr->counts.ena += count->ena; + ps_aggr->counts.run += count->run; + } + break; + } + } + switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: @@ -405,9 +444,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_SOCKET: case AGGR_NODE: case AGGR_NONE: - if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu_map_idx, thread, count); - perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, cpu_map_idx, &rt_stat); @@ -469,10 +505,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; - if (!counter->snapshot) - evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); - update_stats(&ps->res_stats, *count); if (verbose > 0) { -- cgit v1.2.3 From 049aba09e2156dd2ff1a61bf1b8738b0fc864c9d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:17 -0700 Subject: perf stat: Factor out evsel__count_has_error() It's possible to have 0 enabled/running time for some per-task or per-cgroup events since it's not scheduled on any CPU. Treating the whole event as failed would not work in this case. Thinking again, the code only existed when any CPU-level aggregation is enabled (like per-socket, per-core, ...). To make it clearer, factor out the condition check into the new evsel__count_has_error() function and add some comments. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 99874254809d..dc075d5a0f72 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -381,6 +381,25 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, return ret; } +static bool evsel__count_has_error(struct evsel *evsel, + struct perf_counts_values *count, + struct perf_stat_config *config) +{ + /* the evsel was failed already */ + if (evsel->err || evsel->counts->scaled == -1) + return true; + + /* this is meaningful for CPU aggregation modes only */ + if (config->aggr_mode == AGGR_GLOBAL) + return false; + + /* it's considered ok when it actually ran */ + if (count->ena != 0 && count->run != 0) + return false; + + return true; +} + static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, int cpu_map_idx, int thread, @@ -420,8 +439,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, * When any result is bad, make them all to give * consistent output in interval mode. */ - if (count->ena == 0 || count->run == 0 || - evsel->counts->scaled == -1) { + if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) { ps_aggr->counts.val = 0; ps_aggr->counts.ena = 0; ps_aggr->counts.run = 0; -- cgit v1.2.3 From 050059e1b1affc080ede9fe691768e2383eb6367 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:18 -0700 Subject: perf stat: Aggregate per-thread stats using evsel->stats->aggr Per-thread aggregation doesn't use the CPU numbers but the logic should be the same. Initialize cpu_aggr_map separately for AGGR_THREAD and use thread map idx to aggregate counter values. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 31 +++++++++++++++++++++++++++++++ tools/perf/util/stat.c | 24 ++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index abede56d79b6..6777fef0d56c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1465,6 +1465,21 @@ static int perf_stat_init_aggr_mode(void) stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } + if (stat_config.aggr_mode == AGGR_THREAD) { + nr = perf_thread_map__nr(evsel_list->core.threads); + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + /* * The evsel_list->cpus is the base we operate on, * taking the highest cpu number to be the size of @@ -1674,6 +1689,22 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); bool needs_sort = stat_config.aggr_mode != AGGR_NONE; + if (stat_config.aggr_mode == AGGR_THREAD) { + int nr = perf_thread_map__nr(evsel_list->core.threads); + + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + if (!get_id) return 0; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index dc075d5a0f72..5b04c9d16156 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -422,6 +422,24 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, evsel__compute_deltas(evsel, cpu_map_idx, thread, count); perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_THREAD) { + struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (count->val == 0 && config->system_wide) + return 0; + + ps->aggr[thread].nr++; + + aggr_counts->val += count->val; + aggr_counts->ena += count->ena; + aggr_counts->run += count->run; + goto update; + } + if (ps->aggr) { struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu); @@ -436,8 +454,9 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, ps_aggr->nr++; /* - * When any result is bad, make them all to give - * consistent output in interval mode. + * When any result is bad, make them all to give consistent output + * in interval mode. But per-task counters can have 0 enabled time + * when some tasks are idle. */ if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) { ps_aggr->counts.val = 0; @@ -455,6 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } +update: switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: -- cgit v1.2.3 From ae7e6492ee54e18cc3d6ed4bb5ca857726a7e9c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:19 -0700 Subject: perf stat: Allocate aggr counts for recorded data In the process_stat_config_event() it sets the aggr_mode that means the earlier evlist__alloc_stats() cannot allocate the aggr counts due to the missing aggr_mode. Do it after setting the aggr_map using evlist__alloc_aggr_stats(). Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 20 +++++++++++++++----- tools/perf/util/stat.c | 39 +++++++++++++++++++++++++++++++-------- tools/perf/util/stat.h | 2 ++ 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6777fef0d56c..2a6a5d0c5563 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1342,7 +1342,11 @@ static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __m static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id; + + /* per-process mode - should use global aggr mode */ + if (cpu.cpu == -1) + return get_id(config, cpu); if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); @@ -2125,17 +2129,23 @@ int process_stat_config_event(struct perf_session *session, if (perf_cpu_map__empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); - return 0; - } - - if (st->aggr_mode != AGGR_UNSET) + } else if (st->aggr_mode != AGGR_UNSET) { stat_config.aggr_mode = st->aggr_mode; + } if (perf_stat.data.is_pipe) perf_stat_init_aggr_mode(); else perf_stat_init_aggr_mode_file(st); + if (stat_config.aggr_map) { + int nr_aggr = stat_config.aggr_map->nr; + + if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) { + pr_err("cannot allocate aggr counts\n"); + return -1; + } + } return 0; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5b04c9d16156..1b9048115a18 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -141,6 +141,31 @@ static void evsel__reset_stat_priv(struct evsel *evsel) memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } +static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr) +{ + struct perf_stat_evsel *ps = evsel->stats; + + if (ps == NULL) + return 0; + + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) + return -ENOMEM; + + return 0; +} + +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) + return -1; + } + return 0; +} static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { @@ -150,16 +175,14 @@ static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) if (ps == NULL) return -ENOMEM; - if (nr_aggr) { - ps->nr_aggr = nr_aggr; - ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); - if (ps->aggr == NULL) { - free(ps); - return -ENOMEM; - } + evsel->stats = ps; + + if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) { + evsel->stats = NULL; + free(ps); + return -ENOMEM; } - evsel->stats = ps; perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0980875b9be1..4c00f814bd79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -275,6 +275,8 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist); void evlist__copy_prev_raw_counts(struct evlist *evlist); void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); + int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); struct perf_tool; -- cgit v1.2.3 From 8f97963e09761c239522649921d7b1c57ff2debb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:20 -0700 Subject: perf stat: Reset aggr counts for each interval The evsel->stats->aggr->count should be reset for interval processing since we want to use the values directly for display. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +++ tools/perf/util/stat.c | 20 +++++++++++++++++--- tools/perf/util/stat.h | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2a6a5d0c5563..bff28a199dfd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -492,6 +492,8 @@ static void process_interval(void) diff_timespec(&rs, &ts, &ref_time); perf_stat__reset_shadow_per_stat(&rt_stat); + evlist__reset_aggr_stats(evsel_list); + read_counters(&rs); if (STAT_RECORD) { @@ -965,6 +967,7 @@ try_again_reset: evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_aggr_stats(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); } else { update_stats(&walltime_nsecs_stats, t1 - t0); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1b9048115a18..a4066f0d3637 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -130,17 +130,23 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) } } -static void evsel__reset_stat_priv(struct evsel *evsel) +static void evsel__reset_aggr_stats(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; struct perf_stat_aggr *aggr = ps->aggr; - init_stats(&ps->res_stats); - if (aggr) memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); } +static void evsel__reset_stat_priv(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + + init_stats(&ps->res_stats); + evsel__reset_aggr_stats(evsel); +} + static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr) { struct perf_stat_evsel *ps = evsel->stats; @@ -276,6 +282,14 @@ void evlist__reset_stats(struct evlist *evlist) } } +void evlist__reset_aggr_stats(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__reset_aggr_stats(evsel); +} + void evlist__reset_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 4c00f814bd79..809f9f0aff0c 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -276,6 +276,7 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist); void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); +void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); -- cgit v1.2.3 From 8962cbec5a0672b3966e9c48ac22e207d56e13ca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:21 -0700 Subject: perf stat: Split process_counters() to share it with process_stat_round_event() It'd do more processing with aggregation. Let's split the function so that it can be shared with by process_stat_round_event() too. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bff28a199dfd..838d29590bed 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -465,15 +465,19 @@ static int read_bpf_map_counters(void) return 0; } -static void read_counters(struct timespec *rs) +static int read_counters(struct timespec *rs) { - struct evsel *counter; - if (!stat_config.stop_read_counter) { if (read_bpf_map_counters() || read_affinity_counters(rs)) - return; + return -1; } + return 0; +} + +static void process_counters(void) +{ + struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -494,7 +498,8 @@ static void process_interval(void) perf_stat__reset_shadow_per_stat(&rt_stat); evlist__reset_aggr_stats(evsel_list); - read_counters(&rs); + if (read_counters(&rs) == 0) + process_counters(); if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) @@ -980,7 +985,8 @@ try_again_reset: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); + if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0) + process_counters(); /* * We need to keep evsel_list alive, because it's processed @@ -2099,13 +2105,11 @@ static int process_stat_round_event(struct perf_session *session, union perf_event *event) { struct perf_record_stat_round *stat_round = &event->stat_round; - struct evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each_entry(evsel_list, counter) - perf_stat_process_counter(&stat_config, counter); + process_counters(); if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) update_stats(&walltime_nsecs_stats, stat_round->time); -- cgit v1.2.3 From 942c5593393d9418bf521e77fa1eab47599efc4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:22 -0700 Subject: perf stat: Add perf_stat_merge_counters() The perf_stat_merge_counters() is to aggregate the same events in different PMUs like in case of uncore or hybrid. The same logic is in the stat-display routines but I think it should be handled when it processes the event counters. As it works on the aggr_counters, it doesn't change the output yet. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 + tools/perf/util/stat.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 2 + 3 files changed, 100 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 838d29590bed..371d6e896942 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -486,6 +486,8 @@ static void process_counters(void) pr_warning("failed to process counter %s\n", counter->name); counter->err = 0; } + + perf_stat_merge_counters(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index a4066f0d3637..aff1e7390585 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -595,6 +595,102 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; } +static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) +{ + struct perf_stat_evsel *ps_a = evsel->stats; + struct perf_stat_evsel *ps_b = alias->stats; + int i; + + if (ps_a->aggr == NULL && ps_b->aggr == NULL) + return 0; + + if (ps_a->nr_aggr != ps_b->nr_aggr) { + pr_err("Unmatched aggregation mode between aliases\n"); + return -1; + } + + for (i = 0; i < ps_a->nr_aggr; i++) { + struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; + struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; + + /* NB: don't increase aggr.nr for aliases */ + + aggr_counts_a->val += aggr_counts_b->val; + aggr_counts_a->ena += aggr_counts_b->ena; + aggr_counts_a->run += aggr_counts_b->run; + } + + return 0; +} +/* events should have the same name, scale, unit, cgroup but on different PMUs */ +static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) +{ + if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) + return false; + + if (evsel_a->scale != evsel_b->scale) + return false; + + if (evsel_a->cgrp != evsel_b->cgrp) + return false; + + if (strcmp(evsel_a->unit, evsel_b->unit)) + return false; + + if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) + return false; + + return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); +} + +static void evsel__merge_aliases(struct evsel *evsel) +{ + struct evlist *evlist = evsel->evlist; + struct evsel *alias; + + alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); + list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { + /* Merge the same events on different PMUs. */ + if (evsel__is_alias(evsel, alias)) { + evsel__merge_aggr_counters(evsel, alias); + alias->merged_stat = true; + } + } +} + +static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config) +{ + struct perf_pmu *pmu; + + if (!config->hybrid_merge) + return false; + + pmu = evsel__find_pmu(evsel); + return pmu && pmu->is_hybrid; +} + +static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) +{ + /* this evsel is already merged */ + if (evsel->merged_stat) + return; + + if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + evsel__merge_aliases(evsel); +} + +/* merge the same uncore and hybrid events if requested */ +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->no_merge) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__merge_stats(evsel, config); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 809f9f0aff0c..728bbc823b0d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -280,6 +280,8 @@ void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); + struct perf_tool; union perf_event; struct perf_session; -- cgit v1.2.3 From 1d6d2bea5b97359ff09a8d793674aab3e5f79023 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:23 -0700 Subject: perf stat: Add perf_stat_process_percore() The perf_stat_process_percore() is to aggregate counts for an event per-core even if the aggr_mode is AGGR_NONE. This is enabled when user requested it on the command line. To handle that, it keeps the per-cpu counts at first. And then it aggregates the counts that have the same core id in the aggr->counts and updates the values for each cpu back. Later, per-core events will skip one of the CPUs unless percore-show-thread option is given. In that case, it can simply print all cpu stats with the updated (per-core) values. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-17-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 3 ++ 3 files changed, 75 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 371d6e896942..d6a006e41da0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -488,6 +488,7 @@ static void process_counters(void) } perf_stat_merge_counters(&stat_config, evsel_list); + perf_stat_process_percore(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index aff1e7390585..26c48ef7ca92 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -691,6 +691,77 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev evsel__merge_stats(evsel, config); } +static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct perf_counts_values counts = { 0, }; + struct aggr_cpu_id id; + struct perf_cpu cpu; + int idx; + + /* collect per-core counts */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + counts.val += aggr->counts.val; + counts.ena += aggr->counts.ena; + counts.run += aggr->counts.run; + } + + /* update aggregated per-core counts for each CPU */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + aggr->counts.val = counts.val; + aggr->counts.ena = counts.ena; + aggr->counts.run = counts.run; + + aggr->used = true; + } +} + +/* we have an aggr_map for cpu, but want to aggregate the counters per-core */ +static void evsel__process_percore(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct aggr_cpu_id core_id; + struct perf_cpu cpu; + int idx; + + if (!evsel->percore) + return; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + if (aggr->used) + continue; + + core_id = aggr_cpu_id__core(cpu, NULL); + evsel__update_percore_stats(evsel, &core_id); + } +} + +/* process cpu stats on per-core events */ +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->aggr_mode != AGGR_NONE) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__process_percore(evsel); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 728bbc823b0d..d23f8743e442 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -51,6 +51,8 @@ struct perf_stat_aggr { int nr; /* whether any entry has failed to read/process event */ bool failed; + /* to mark this data is processed already */ + bool used; }; /* per-evsel event stats */ @@ -281,6 +283,7 @@ void evlist__reset_aggr_stats(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist); struct perf_tool; union perf_event; -- cgit v1.2.3 From 88f1d3512c947ad8e396e50acfce5ee55133df0a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:24 -0700 Subject: perf stat: Add perf_stat_process_shadow_stats() This function updates the shadow stats using the aggregated counts uniformly since it uses the aggr_counts for the every aggr mode. It'd have duplicate shadow stats for each items for now since the display routines will update them once again. But that'd be fine as it shows the average values and it'd be gone eventually. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-18-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat.c | 50 ++++++++++++++++++++++++----------------------- tools/perf/util/stat.h | 1 + 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d6a006e41da0..d7c52cef70a3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -489,6 +489,7 @@ static void process_counters(void) perf_stat_merge_counters(&stat_config, evsel_list); perf_stat_process_percore(&stat_config, evsel_list); + perf_stat_process_shadow_stats(&stat_config, evsel_list); } static void process_interval(void) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 26c48ef7ca92..c0955a0427ab 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -474,7 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, aggr_counts->val += count->val; aggr_counts->ena += count->ena; aggr_counts->run += count->run; - goto update; + return 0; } if (ps->aggr) { @@ -511,32 +511,10 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } -update: - switch (config->aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_DIE: - case AGGR_SOCKET: - case AGGR_NODE: - case AGGR_NONE: - if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { - perf_stat__update_shadow_stats(evsel, count->val, - cpu_map_idx, &rt_stat); - } - - if (config->aggr_mode == AGGR_THREAD) { - perf_stat__update_shadow_stats(evsel, count->val, - thread, &rt_stat); - } - break; - case AGGR_GLOBAL: + if (config->aggr_mode == AGGR_GLOBAL) { aggr->val += count->val; aggr->ena += count->ena; aggr->run += count->run; - case AGGR_UNSET: - case AGGR_MAX: - default: - break; } return 0; @@ -762,6 +740,30 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e evsel__process_percore(evsel); } +static void evsel__update_shadow_stats(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + int i; + + if (ps->aggr == NULL) + return; + + for (i = 0; i < ps->nr_aggr; i++) { + struct perf_counts_values *aggr_counts = &ps->aggr[i].counts; + + perf_stat__update_shadow_stats(evsel, aggr_counts->val, i, &rt_stat); + } +} + +void perf_stat_process_shadow_stats(struct perf_stat_config *config __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__update_shadow_stats(evsel); +} + int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d23f8743e442..3d413ba8c68a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -284,6 +284,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_shadow_stats(struct perf_stat_config *config, struct evlist *evlist); struct perf_tool; union perf_event; -- cgit v1.2.3 From 91f85f98da7ab8c32105f42dd03884c01ec4498f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:25 -0700 Subject: perf stat: Display event stats using aggr counts Now aggr counts are ready for use. Convert the display routines to use the aggr counts and update the shadow stat with them. It doesn't need to aggregate counts or collect aliases anymore during the display. Get rid of now unused struct perf_aggr_thread_value. Note that there's a difference in the display order among the aggr mode. For per-core/die/socket/node aggregation, it shows relevant events in the same unit together, whereas global/thread/no aggregation it shows the same events for different units together. So it still uses separate codes to display them due to the ordering. One more thing to note is that it breaks per-core event display for now. The next patch will fix it to have identical output as of now. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-19-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 421 +++++------------------------------------ tools/perf/util/stat.c | 5 - tools/perf/util/stat.h | 9 - 3 files changed, 49 insertions(+), 386 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4113aa86772f..bfae2784609c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -442,31 +442,6 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_map_idx(struct perf_stat_config *config, - struct evsel *evsel, const struct aggr_cpu_id *id) -{ - struct perf_cpu_map *cpus = evsel__cpus(evsel); - struct perf_cpu cpu; - int idx; - - if (config->aggr_mode == AGGR_NONE) - return perf_cpu_map__idx(cpus, id->cpu); - - if (config->aggr_mode == AGGR_THREAD) - return id->thread_idx; - - if (!config->aggr_get_id) - return 0; - - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); - - if (aggr_cpu_id__equal(&cpu_id, id)) - return idx; - } - return 0; -} - static void abs_printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) { @@ -537,7 +512,7 @@ static bool is_mixed_hw_group(struct evsel *counter) static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, - struct runtime_stat *st) + struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; struct outstate os = { @@ -648,8 +623,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int print_running(config, run, ena); } - perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_map_idx(config, counter, &id), + perf_stat__print_shadow_stats(config, counter, uval, map_idx, &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only && !config->json_output) { print_noise(config, counter, noise); @@ -657,34 +631,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } } -static void aggr_update_shadow(struct perf_stat_config *config, - struct evlist *evlist) -{ - int idx, s; - struct perf_cpu cpu; - struct aggr_cpu_id s2, id; - u64 val; - struct evsel *counter; - struct perf_cpu_map *cpus; - - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - evlist__for_each_entry(evlist, counter) { - cpus = evsel__cpus(counter); - val = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &id)) - continue; - val += perf_counts(counter->counts, idx, 0)->val; - } - perf_stat__update_shadow_stats(counter, val, - first_shadow_map_idx(config, counter, &id), - &rt_stat); - } - } -} - static void uniquify_event_name(struct evsel *counter) { char *new_name; @@ -721,137 +667,51 @@ static void uniquify_event_name(struct evsel *counter) counter->uniquified_name = true; } -static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) +static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) { - struct evlist *evlist = counter->evlist; - struct evsel *alias; - - alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); - list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - /* Merge events with the same name, etc. but on different PMUs. */ - if (!strcmp(evsel__name(alias), evsel__name(counter)) && - alias->scale == counter->scale && - alias->cgrp == counter->cgrp && - !strcmp(alias->unit, counter->unit) && - evsel__is_clock(alias) == evsel__is_clock(counter) && - strcmp(alias->pmu_name, counter->pmu_name)) { - alias->merged_stat = true; - cb(config, alias, data, false); - } - } + return evsel__is_hybrid(evsel) && !config->hybrid_merge; } -static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, - bool check) +static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (evsel__is_hybrid(counter)) { - if (check) - return config->hybrid_merge; - else - return !config->hybrid_merge; - } - - return false; -} - -static bool collect_data(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) -{ - if (counter->merged_stat) - return false; - cb(config, counter, data, true); - if (config->no_merge || hybrid_merge(counter, config, false)) + if (config->no_merge || hybrid_uniquify(counter, config)) uniquify_event_name(counter); - else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) - collect_all_aliases(config, counter, cb, data); - return true; -} - -struct aggr_data { - u64 ena, run, val; - struct aggr_cpu_id id; - int nr; - int cpu_map_idx; -}; - -static void aggr_cb(struct perf_stat_config *config, - struct evsel *counter, void *data, bool first) -{ - struct aggr_data *ad = data; - int idx; - struct perf_cpu cpu; - struct perf_cpu_map *cpus; - struct aggr_cpu_id s2; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct perf_counts_values *counts; - - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &ad->id)) - continue; - if (first) - ad->nr++; - counts = perf_counts(counter->counts, idx, 0); - /* - * When any result is bad, make them all to give - * consistent output in interval mode. - */ - if (counts->ena == 0 || counts->run == 0 || - counter->counts->scaled == -1) { - ad->ena = 0; - ad->run = 0; - break; - } - ad->val += counts->val; - ad->ena += counts->ena; - ad->run += counts->run; - } } static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, struct perf_cpu cpu) + bool *first) { - struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int nr; - struct aggr_cpu_id id; double uval; + struct perf_stat_evsel *ps = counter->stats; + struct perf_stat_aggr *aggr = &ps->aggr[s]; + struct aggr_cpu_id id = config->aggr_map->map[s]; + double avg = aggr->counts.val; - ad.id = id = config->aggr_map->map[s]; - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) + if (counter->supported && aggr->nr == 0) return; - if (perf_pmu__has_hybrid() && ad.ena == 0) - return; + uniquify_counter(config, counter); + + val = aggr->counts.val; + ena = aggr->counts.ena; + run = aggr->counts.run; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; if (*first && metric_only) { *first = false; - aggr_printout(config, counter, id, nr); + aggr_printout(config, counter, id, aggr->nr); } if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu.cpu != -1) - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, nr, counter, uval, - prefix, run, ena, 1.0, &rt_stat); + printout(config, id, aggr->nr, counter, uval, + prefix, run, ena, avg, &rt_stat, s); + if (!metric_only) fputc('\n', output); } @@ -869,8 +729,6 @@ static void print_aggr(struct perf_stat_config *config, if (!config->aggr_map || !config->aggr_get_id) return; - aggr_update_shadow(config, evlist); - /* * With metric_only everything is on a single line. * Without each counter has its own line. @@ -881,188 +739,36 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { + if (counter->merged_stat) + continue; + print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, + &first); } if (metric_only) fputc('\n', output); } } -static int cmp_val(const void *a, const void *b) -{ - return ((struct perf_aggr_thread_value *)b)->val - - ((struct perf_aggr_thread_value *)a)->val; -} - -static struct perf_aggr_thread_value *sort_aggr_thread( - struct evsel *counter, - int *ret, - struct target *_target) -{ - int nthreads = perf_thread_map__nr(counter->core.threads); - int i = 0; - double uval; - struct perf_aggr_thread_value *buf; - - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); - if (!buf) - return NULL; - - for (int thread = 0; thread < nthreads; thread++) { - int idx; - u64 ena = 0, run = 0, val = 0; - - perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { - struct perf_counts_values *counts = - perf_counts(counter->counts, idx, thread); - - val += counts->val; - ena += counts->ena; - run += counts->run; - } - - uval = val * counter->scale; - - /* - * Skip value 0 when enabling --per-thread globally, - * otherwise too many 0 output. - */ - if (uval == 0.0 && target__has_per_thread(_target)) - continue; - - buf[i].counter = counter; - buf[i].id = aggr_cpu_id__empty(); - buf[i].id.thread_idx = thread; - buf[i].uval = uval; - buf[i].val = val; - buf[i].run = run; - buf[i].ena = ena; - i++; - } - - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); - - if (ret) - *ret = i; - - return buf; -} - -static void print_aggr_thread(struct perf_stat_config *config, - struct target *_target, - struct evsel *counter, char *prefix) -{ - FILE *output = config->output; - int thread, sorted_threads; - struct aggr_cpu_id id; - struct perf_aggr_thread_value *buf; - - buf = sort_aggr_thread(counter, &sorted_threads, _target); - if (!buf) { - perror("cannot sort aggr thread"); - return; - } - - for (thread = 0; thread < sorted_threads; thread++) { - if (prefix) - fprintf(output, "%s", prefix); - - id = buf[thread].id; - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); - fputc('\n', output); - } - - free(buf); -} - -struct caggr_data { - double avg, avg_enabled, avg_running; -}; - -static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct caggr_data *cd = data; - struct perf_counts_values *aggr = &counter->counts->aggr; - - cd->avg += aggr->val; - cd->avg_enabled += aggr->ena; - cd->avg_running += aggr->run; -} - -/* - * Print out the results of a single counter: - * aggregated counts in system-wide mode - */ -static void print_counter_aggr(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - bool metric_only = config->metric_only; - FILE *output = config->output; - double uval; - struct caggr_data cd = { .avg = 0.0 }; - - if (!collect_data(config, counter, counter_aggr_cb, &cd)) - return; - - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = cd.avg * counter->scale; - printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, - cd.avg_enabled, cd.avg, &rt_stat); - if (!metric_only) - fprintf(output, "\n"); -} - -static void counter_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct aggr_data *ad = data; - - ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; -} - -/* - * Print out the results of a single counter: - * does not use aggregated count in system-wide - */ static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - FILE *output = config->output; - u64 ena, run, val; - double uval; - int idx; - struct perf_cpu cpu; - struct aggr_cpu_id id; - - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { - struct aggr_data ad = { .cpu_map_idx = idx }; - - if (!collect_data(config, counter, counter_cb, &ad)) - return; - val = ad.val; - ena = ad.ena; - run = ad.run; + bool metric_only = config->metric_only; + bool first = false; + int s; - if (prefix) - fprintf(output, "%s", prefix); + /* AGGR_THREAD doesn't have config->aggr_get_id */ + if (!config->aggr_map) + return; - uval = val * counter->scale; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + if (counter->merged_stat) + return; - fputc('\n', output); + for (s = 0; s < config->aggr_map->nr; s++) { + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); } } @@ -1081,6 +787,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, u64 ena, run, val; double uval; struct aggr_cpu_id id; + struct perf_stat_evsel *ps = counter->stats; int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); if (counter_idx < 0) @@ -1093,13 +800,13 @@ static void print_no_aggr_metric(struct perf_stat_config *config, aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, counter_idx, 0)->val; - ena = perf_counts(counter->counts, counter_idx, 0)->ena; - run = perf_counts(counter->counts, counter_idx, 0)->run; + val = ps->aggr[counter_idx].counts.val; + ena = ps->aggr[counter_idx].counts.ena; + run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + run, ena, 1.0, &rt_stat, counter_idx); } if (!first) fputc('\n', config->output); @@ -1135,8 +842,8 @@ static void print_metric_headers(struct perf_stat_config *config, }; bool first = true; - if (config->json_output && !config->interval) - fprintf(config->output, "{"); + if (config->json_output && !config->interval) + fprintf(config->output, "{"); if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); @@ -1379,31 +1086,6 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } -static void print_percore_thread(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - int s; - struct aggr_cpu_id s2, id; - struct perf_cpu_map *cpus; - bool first = true; - int idx; - struct perf_cpu cpu; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - if (aggr_cpu_id__equal(&s2, &id)) - break; - } - - print_counter_aggrdata(config, counter, s, - prefix, false, - &first, cpu); - } -} - static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1416,15 +1098,14 @@ static void print_percore(struct perf_stat_config *config, return; if (config->percore_show_thread) - return print_percore_thread(config, counter, prefix); + return print_counter(config, counter, prefix); for (s = 0; s < config->aggr_map->nr; s++) { if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + prefix, metric_only, &first); } if (metric_only) @@ -1469,17 +1150,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf print_aggr(config, evlist, prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evlist, counter) { - print_aggr_thread(config, _target, counter, prefix); - } - break; case AGGR_GLOBAL: if (config->iostat_run) iostat_print_counters(evlist, config, ts, prefix = buf, - print_counter_aggr); + print_counter); else { evlist__for_each_entry(evlist, counter) { - print_counter_aggr(config, counter, prefix); + print_counter(config, counter, prefix); } if (metric_only) fputc('\n', config->output); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c0955a0427ab..0316557adce9 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -565,11 +565,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, evsel__name(counter), count[0], count[1], count[2]); } - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); - return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 3d413ba8c68a..382a1ab92ce1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -224,15 +224,6 @@ static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rus struct evsel; struct evlist; -struct perf_aggr_thread_value { - struct evsel *counter; - struct aggr_cpu_id id; - double uval; - u64 val; - u64 run; - u64 ena; -}; - bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ -- cgit v1.2.3 From cec94d69636a023302d484ec036158b792001d3c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:26 -0700 Subject: perf stat: Display percore events properly The recent change in the perf stat broke the percore event display. Note that the aggr counts are already processed so that the every sibling thread in the same core will get the per-core counter values. Check percore evsels and skip the sibling threads in the display. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-20-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 16 ---------------- tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d7c52cef70a3..9d35a3338976 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1404,18 +1404,6 @@ static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *con return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); } -static bool term_percore_set(void) -{ - struct evsel *counter; - - evlist__for_each_entry(evsel_list, counter) { - if (counter->percore) - return true; - } - - return false; -} - static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1428,8 +1416,6 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NODE: return aggr_cpu_id__node; case AGGR_NONE: - if (term_percore_set()) - return aggr_cpu_id__core; return aggr_cpu_id__cpu; case AGGR_GLOBAL: return aggr_cpu_id__global; @@ -1453,8 +1439,6 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) - return perf_stat__get_core_cached; return perf_stat__get_cpu_cached; case AGGR_GLOBAL: return perf_stat__get_global_cached; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bfae2784609c..657434cd29ee 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1091,7 +1091,8 @@ static void print_percore(struct perf_stat_config *config, { bool metric_only = config->metric_only; FILE *output = config->output; - int s; + struct cpu_aggr_map *core_map; + int s, c, i; bool first = true; if (!config->aggr_map || !config->aggr_get_id) @@ -1100,13 +1101,35 @@ static void print_percore(struct perf_stat_config *config, if (config->percore_show_thread) return print_counter(config, counter, prefix); - for (s = 0; s < config->aggr_map->nr; s++) { + core_map = cpu_aggr_map__empty_new(config->aggr_map->nr); + if (core_map == NULL) { + fprintf(output, "Cannot allocate per-core aggr map for display\n"); + return; + } + + for (s = 0, c = 0; s < config->aggr_map->nr; s++) { + struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu; + struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL); + bool found = false; + + for (i = 0; i < c; i++) { + if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) { + found = true; + break; + } + } + if (found) + continue; + if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, prefix, metric_only, &first); + + core_map->map[c++] = core_id; } + free(core_map); if (metric_only) fputc('\n', output); -- cgit v1.2.3 From 8b76a3188b85724f345ea5d03ff206cc9bbe6d72 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Oct 2022 19:02:27 -0700 Subject: perf stat: Remove unused perf_counts.aggr field The aggr field in the struct perf_counts is to keep the aggregated value in the AGGR_GLOBAL for the old code. But it's not used anymore. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221018020227.85905-21-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/counts.c | 1 - tools/perf/util/counts.h | 1 - tools/perf/util/stat.c | 39 ++++++--------------------------------- 3 files changed, 6 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 7a447d918458..11cd85b278a6 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts) { xyarray__reset(counts->loaded); xyarray__reset(counts->values); - memset(&counts->aggr, 0, sizeof(struct perf_counts_values)); } void evsel__reset_counts(struct evsel *evsel) diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 5de275194f2b..42760242e0df 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -11,7 +11,6 @@ struct evsel; struct perf_counts { s8 scaled; - struct perf_counts_values aggr; struct xyarray *values; struct xyarray *loaded; }; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0316557adce9..3a432a949d46 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -308,8 +308,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel) *perf_counts(evsel->prev_raw_counts, idx, thread); } } - - evsel->counts->aggr = evsel->prev_raw_counts->aggr; } void evlist__copy_prev_raw_counts(struct evlist *evlist) @@ -320,26 +318,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) -{ - struct evsel *evsel; - - /* - * To collect the overall statistics for interval mode, - * we copy the counts from evsel->prev_raw_counts to - * evsel->counts. The perf_stat_process_counter creates - * aggr values from per cpu values, but the per cpu values - * are 0 for AGGR_GLOBAL. So we use a trick that saves the - * previous aggr value to the first member of perf_counts, - * then aggr calculation in process_counter_values can work - * correctly. - */ - evlist__for_each_entry(evlist, evsel) { - *perf_counts(evsel->prev_raw_counts, 0, 0) = - evsel->prev_raw_counts->aggr; - } -} - static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { uint64_t *key = (uint64_t *) __key; @@ -442,7 +420,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { - struct perf_counts_values *aggr = &evsel->counts->aggr; struct perf_stat_evsel *ps = evsel->stats; static struct perf_counts_values zero; bool skip = false; @@ -511,12 +488,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, } } - if (config->aggr_mode == AGGR_GLOBAL) { - aggr->val += count->val; - aggr->ena += count->ena; - aggr->run += count->run; - } - return 0; } @@ -541,13 +512,10 @@ static int process_counter_maps(struct perf_stat_config *config, int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter) { - struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->stats; - u64 *count = counter->counts->aggr.values; + u64 *count; int ret; - aggr->val = aggr->ena = aggr->run = 0; - if (counter->per_pkg) evsel__zero_per_pkg(counter); @@ -558,6 +526,11 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; + /* + * GLOBAL aggregation mode only has a single aggr counts, + * so we can use ps->aggr[0] as the actual output. + */ + count = ps->aggr[0].counts.values; update_stats(&ps->res_stats, *count); if (verbose > 0) { -- cgit v1.2.3 From a87edbec35725ced484f6b8275f1246ed7194329 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 22 Oct 2022 17:27:34 +0800 Subject: perf daemon: Complete list of supported subcommand in help message perf daemon supports start, signal, stop and ping subcommands, complete it Before: # perf daemon -h Usage: perf daemon start [] or: perf daemon [] -v, --verbose be more verbose -x, --field-separator[=] print counts with custom separator --base base directory --config config file path After: # perf daemon -h Usage: perf daemon {start|signal|stop|ping} [] or: perf daemon [] -v, --verbose be more verbose -x, --field-separator[=] print counts with custom separator --base base directory --config config file path Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221022092735.114967-3-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 6cb3f6cc36d0..3ce0c960ccc9 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -100,7 +100,7 @@ static struct daemon __daemon = { }; static const char * const daemon_usage[] = { - "perf daemon start []", + "perf daemon {start|signal|stop|ping} []", "perf daemon []", NULL }; -- cgit v1.2.3 From 0cef66a98420f1210be697ed47bd7417ed428847 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 22 Oct 2022 17:27:35 +0800 Subject: perf config: Add missing newline on pr_warning() call in home_perfconfig() Add missing newline on pr_warning() call in home_perfconfig(). Before: # perf record File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it.Couldn't synthesize bpf events. After: # perf record File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it. Couldn't synthesize bpf events. Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221022092735.114967-4-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3f2ae19a1dd4..658170b8dcef 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -556,7 +556,7 @@ static char *home_perfconfig(void) config = strdup(mkpath("%s/.perfconfig", home)); if (config == NULL) { - pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home); return NULL; } @@ -564,7 +564,7 @@ static char *home_perfconfig(void) goto out_free; if (st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, ignoring it.", config); + pr_warning("File %s not owned by current user or root, ignoring it.\n", config); goto out_free; } -- cgit v1.2.3 From 743ef218c2fbe63502615a2044977041ee068322 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:25:18 -0300 Subject: perf unwind arm64: Remove needless event.h & thread.h includes To reduce compile time and header dependency chains just add forward declarations for pointer types and include linux/types.h for u64. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm64-frame-pointer-unwind-support.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h index 32af9ce94398..42d3a45490f5 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.h +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -2,8 +2,10 @@ #ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H #define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H -#include "event.h" -#include "thread.h" +#include + +struct perf_sample; +struct thread; u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); -- cgit v1.2.3 From 6bc13cab5798bd9b049694983ae5702666d24e83 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:32:19 -0300 Subject: perf arch x86: Add missing stdlib.h to get free() prototype It was getting indirectly, out of luck, add it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/event.c | 1 + tools/perf/arch/x86/util/tsc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index e670f3547581..55ff6aec10fd 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "../../../util/event.h" #include "../../../util/synthetic-events.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index eb2b5195bd02..9b99f48b923c 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "../../../util/debug.h" #include "../../../util/tsc.h" -- cgit v1.2.3 From 06bf28cbc63287c69fe834b527127a56b65de2d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:33:04 -0300 Subject: perf scripting python: Add missing util/perf_regs.h include to get perf_reg_name() prototype It was getting it via event.h, that doesn't need that include anymore and will drop it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7bc8559dce6a..1985d1a42a22 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -52,6 +52,7 @@ #include "print_binary.h" #include "stat.h" #include "mem-events.h" +#include "util/perf_regs.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ -- cgit v1.2.3 From b15cf900d11d4db2c2dac544a27f3e1217bdd0d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2022 17:34:44 -0300 Subject: perf event: Drop perf_regs.h include, not needed anymore Since commit c897899752478d4c ("perf tools: Prevent out-of-bounds access to registers") the util/event.h header doesn't use anything from util/perf_regs.h, so drop it to untangle the header dependency tree a bit, speeding up compilation. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 12eae6917022..65495f6945b4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -12,8 +12,6 @@ #include #include -#include "perf_regs.h" - struct dso; struct machine; struct perf_event_attr; -- cgit v1.2.3 From ad7ad6b5ddf63b436a5344fb686887f2d8b7cf3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 20 Oct 2022 18:25:09 +0300 Subject: perf scripts python: intel-pt-events.py: Add ability interleave output Intel PT timestamps are not provided for every branch, let alone every instruction, so there can be many samples with the same timestamp. With per-cpu contexts, decoding is done for each CPU in turn, which can make it difficult to see what is happening on different CPUs at the same time. Currently the interleaving from perf script --itrace=i0ns is quite coarse grained. There are often long stretches executing on one CPU and nothing on another. Some people are interested in seeing what happened on multiple CPUs before a crash to debug races etc. To improve perf script interleaving for parallel execution, the intel-pt-events.py script has been enhanced to enable interleaving the output with the same timestamp from different CPUs. It is understood that interleaving is not perfect or causal. Add parameter --interleave [] to interleave sample output for the same timestamp so that no more than n samples for a CPU are displayed in a row. 'n' defaults to 4. Note this only affects the order of output, and only when the timestamp is the same. Example: $ perf script intel-pt-events.py --insn-trace --interleave 3 ... bash 2267/2267 [004] 9323.692625625 563caa3c86f0 jz 0x563caa3c89c7 run_pending_traps+0x30 (/usr/bin/bash) IPC: 1.52 (38/25) bash 2267/2267 [004] 9323.692625625 563caa3c89c7 movq 0x118(%rsp), %rax run_pending_traps+0x307 (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89cf subq %fs:0x28, %rax run_pending_traps+0x30f (/usr/bin/bash) bash 2270/2270 [007] 9323.692625625 55dc58cabf02 jz 0x55dc58cabf48 unquoted_glob_pattern_p+0x102 (/usr/bin/bash) IPC: 1.56 (25/16) bash 2270/2270 [007] 9323.692625625 55dc58cabf04 cmp $0x5d, %al unquoted_glob_pattern_p+0x104 (/usr/bin/bash) bash 2270/2270 [007] 9323.692625625 55dc58cabf06 jnz 0x55dc58cabf10 unquoted_glob_pattern_p+0x106 (/usr/bin/bash) bash 2264/2264 [001] 9323.692625625 7fd556a4376c jbe 0x7fd556a43ac8 round_and_return+0x3fc (/usr/lib/x86_64-linux-gnu/libc.so.6) IPC: 4.30 (43/10) bash 2264/2264 [001] 9323.692625625 7fd556a43772 and $0x8, %edx round_and_return+0x402 (/usr/lib/x86_64-linux-gnu/libc.so.6) bash 2264/2264 [001] 9323.692625625 7fd556a43775 jnz 0x7fd556a43ac8 round_and_return+0x405 (/usr/lib/x86_64-linux-gnu/libc.so.6) bash 2267/2267 [004] 9323.692625625 563caa3c89d8 jnz 0x563caa3c8b11 run_pending_traps+0x318 (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89de add $0x128, %rsp run_pending_traps+0x31e (/usr/bin/bash) bash 2267/2267 [004] 9323.692625625 563caa3c89e5 popq %rbx run_pending_traps+0x325 (/usr/bin/bash) ... Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20221020152509.5298-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 12 ++++- tools/perf/scripts/python/intel-pt-events.py | 65 +++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 92464a5d7eaf..7b6ccd2fa3bf 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -189,8 +189,16 @@ There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. The script also displays branches, and supports 2 additional modes selected by option: - --insn-trace - instruction trace - --src-trace - source trace + - --insn-trace - instruction trace + - --src-trace - source trace + +The intel-pt-events.py script also has options: + + - --all-switch-events - display all switch events, not only the last consecutive. + - --interleave [] - interleave sample output for the same timestamp so that + no more than n samples for a CPU are displayed in a row. 'n' defaults to 4. + Note this only affects the order of output, and only when the timestamp is the + same. As mentioned above, it is easy to capture too much data. One way to limit the data captured is to use 'snapshot' mode which is explained further below. diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 6be7fd8fd615..08862a2582f4 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -13,10 +13,12 @@ from __future__ import print_function +import io import os import sys import struct import argparse +import contextlib from libxed import LibXED from ctypes import create_string_buffer, addressof @@ -39,6 +41,11 @@ glb_src = False glb_source_file_name = None glb_line_number = None glb_dso = None +glb_stash_dict = {} +glb_output = None +glb_output_pos = 0 +glb_cpu = -1 +glb_time = 0 def get_optional_null(perf_dict, field): if field in perf_dict: @@ -70,6 +77,7 @@ def trace_begin(): ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') ap.add_argument("--all-switch-events", action='store_true') + ap.add_argument("--interleave", type=int, nargs='?', const=4, default=0) global glb_args global glb_insn global glb_src @@ -94,11 +102,39 @@ def trace_begin(): perf_set_itrace_options(perf_script_context, itrace) def trace_end(): + if glb_args.interleave: + flush_stashed_output() print("End") def trace_unhandled(event_name, context, event_fields_dict): print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) +def stash_output(): + global glb_stash_dict + global glb_output_pos + output_str = glb_output.getvalue()[glb_output_pos:] + n = len(output_str) + if n: + glb_output_pos += n + if glb_cpu not in glb_stash_dict: + glb_stash_dict[glb_cpu] = [] + glb_stash_dict[glb_cpu].append(output_str) + +def flush_stashed_output(): + global glb_stash_dict + while glb_stash_dict: + cpus = list(glb_stash_dict.keys()) + # Output at most glb_args.interleave output strings per cpu + for cpu in cpus: + items = glb_stash_dict[cpu] + countdown = glb_args.interleave + while len(items) and countdown: + sys.stdout.write(items[0]) + del items[0] + countdown -= 1 + if not items: + del glb_stash_dict[cpu] + def print_ptwrite(raw_buf): data = struct.unpack_from("= 2 and x[0]: machine_pid = x[0] vcpu = x[1] @@ -403,6 +464,8 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): + if glb_args.interleave: + flush_stashed_output() if out: out_str = "Switch out " else: -- cgit v1.2.3 From 439dbef2a94e825241160e1bbd050d01e5728608 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:36 -0700 Subject: perf test: Do not use instructions:u explicitly I think it's to support non-root user tests. But perf record can handle the case and fall back to a software event (cpu-clock). Practically this would affect when it's run on a VM, but it seems no reason to prevent running the test in the guest. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 301f95427159..747c33a1ec45 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -21,18 +21,18 @@ trap trap_cleanup exit term int test_per_thread() { echo "Basic --per-thread mode test" - if ! perf record -e instructions:u -o ${perfdata} --quiet true 2> /dev/null + if ! perf record -o /dev/null --quiet true 2> /dev/null then - echo "Per-thread record [Skipped instructions:u not supported]" + echo "Per-thread record [Skipped event not supported]" if [ $err -ne 1 ] then err=2 fi return fi - if ! perf record -e instructions:u --per-thread -o ${perfdata} true 2> /dev/null + if ! perf record --per-thread -o ${perfdata} true 2> /dev/null then - echo "Per-thread record of instructions:u [Failed]" + echo "Per-thread record [Failed record]" err=1 return fi @@ -49,7 +49,7 @@ test_register_capture() { echo "Register capture test" if ! perf list | egrep -q 'br_inst_retired.near_call' then - echo "Register capture test [Skipped missing instruction]" + echo "Register capture test [Skipped missing event]" if [ $err -ne 1 ] then err=2 -- cgit v1.2.3 From 9e455f4f29e3aeab29b6c1f7d086ddc43fc1a502 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:37 -0700 Subject: perf test: Fix shellcheck issues in the record test Basically there are 3 issues: 1. quote shell expansion 2. do not use egrep 3. use upper case letters for signal names Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 747c33a1ec45..464071462809 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -8,16 +8,16 @@ err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) cleanup() { - rm -f ${perfdata} - rm -f ${perfdata}.old - trap - exit term int + rm -f "${perfdata}" + rm -f "${perfdata}".old + trap - EXIT TERM INT } trap_cleanup() { cleanup exit 1 } -trap trap_cleanup exit term int +trap trap_cleanup EXIT TERM INT test_per_thread() { echo "Basic --per-thread mode test" @@ -30,13 +30,13 @@ test_per_thread() { fi return fi - if ! perf record --per-thread -o ${perfdata} true 2> /dev/null + if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null then echo "Per-thread record [Failed record]" err=1 return fi - if ! perf report -i ${perfdata} -q | egrep -q true + if ! perf report -i "${perfdata}" -q | grep -q true then echo "Per-thread record [Failed missing output]" err=1 @@ -47,7 +47,7 @@ test_per_thread() { test_register_capture() { echo "Register capture test" - if ! perf list | egrep -q 'br_inst_retired.near_call' + if ! perf list | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" if [ $err -ne 1 ] @@ -56,7 +56,7 @@ test_register_capture() { fi return fi - if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' + if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' then echo "Register capture test [Skipped missing registers]" return @@ -64,7 +64,7 @@ test_register_capture() { if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ -c 1000 --per-thread true 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ - | egrep -q "DI:" + | grep -q "DI:" then echo "Register capture test [Failed missing output]" err=1 -- cgit v1.2.3 From 4321ad4ee98b7325d6133e1d5b7fa25bcbdeb57e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:38 -0700 Subject: perf test: Use a test program in 'perf record' tests If the system has cc it could build a test program with two threads and then use it for more detailed testing. Also it accepts an option to run a thread forever to ensure multi-thread runs. If cc is not found, it falls back to use the default value 'true'. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 64 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 464071462809..952981481239 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -6,10 +6,17 @@ set -e err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) +testsym="test_loop" cleanup() { rm -f "${perfdata}" rm -f "${perfdata}".old + + if [ "${testprog}" != "true" ]; then + rm -f "${testprog}" + fi + trap - EXIT TERM INT } @@ -19,9 +26,56 @@ trap_cleanup() { } trap trap_cleanup EXIT TERM INT +build_test_program() { + if ! [ -x "$(command -v cc)" ]; then + # No CC found. Fall back to 'true' + testprog=true + testsym=true + return + fi + + echo "Build a test program" + cat < +#include +#include + +void test_loop(void) { + volatile int count = 1000000; + + while (count--) + continue; +} + +void *thfunc(void *arg) { + int forever = *(int *)arg; + + do { + test_loop(); + } while (forever); + + return NULL; +} + +int main(int argc, char *argv[]) { + pthread_t th; + int forever = 0; + + if (argc > 1) + forever = atoi(argv[1]); + + pthread_create(&th, NULL, thfunc, &forever); + test_loop(); + pthread_join(th, NULL); + + return 0; +} +EOF +} + test_per_thread() { echo "Basic --per-thread mode test" - if ! perf record -o /dev/null --quiet true 2> /dev/null + if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null then echo "Per-thread record [Skipped event not supported]" if [ $err -ne 1 ] @@ -30,13 +84,13 @@ test_per_thread() { fi return fi - if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null + if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null then echo "Per-thread record [Failed record]" err=1 return fi - if ! perf report -i "${perfdata}" -q | grep -q true + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" then echo "Per-thread record [Failed missing output]" err=1 @@ -62,7 +116,7 @@ test_register_capture() { return fi if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ - -c 1000 --per-thread true 2> /dev/null \ + -c 1000 --per-thread ${testprog} 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | grep -q "DI:" then @@ -73,6 +127,8 @@ test_register_capture() { echo "Register capture test [Success]" } +build_test_program + test_per_thread test_register_capture -- cgit v1.2.3 From 6b7e02ab1262141cfebd05b68410fa297f557961 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:39 -0700 Subject: perf test: Wait for a new thread when testing --per-thread record Just running the target program is not enough to test multi-thread target because it'd be racy perf vs target startup. I used the initial delay but it cannot guarantee for perf to see the thread. Instead, use wait_for_threads helper from shell/lib/waiting.sh to make sure it starts the sibling thread first. Then perf record can use -p option to profile the target process. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 952981481239..d1640d1daf2e 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -4,6 +4,9 @@ set -e +shelldir=$(dirname "$0") +. "${shelldir}"/lib/waiting.sh + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) @@ -96,6 +99,30 @@ test_per_thread() { err=1 return fi + + # run the test program in background (forever) + ${testprog} 1 & + TESTPID=$! + + rm -f "${perfdata}" + + wait_for_threads ${TESTPID} 2 + perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null + kill ${TESTPID} + + if [ ! -e "${perfdata}" ] + then + echo "Per-thread record [Failed record -p]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Per-thread record [Failed -p missing output]" + err=1 + return + fi + echo "Basic --per-thread mode test [Success]" } -- cgit v1.2.3 From 2cadf2c7b99a980455500c34571a540300c49c72 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:40 -0700 Subject: perf test: Add system-wide mode in 'perf record' tests Add system wide recording test with the same pattern. It'd skip the test when it fails to run 'perf record'. For system-wide mode, it needs to avoid build-id collection and synthesis because the test only cares about the test program and kernel would generate the necessary events as the process starts. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index d1640d1daf2e..345764afb745 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -154,10 +154,31 @@ test_register_capture() { echo "Register capture test [Success]" } +test_system_wide() { + echo "Basic --system-wide mode test" + if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Skipped not supported]" + if [ $err -ne 1 ] + then + err=2 + fi + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed missing output]" + err=1 + return + fi + echo "Basic --system-wide mode test [Success]" +} + build_test_program test_per_thread test_register_capture +test_system_wide cleanup exit $err -- cgit v1.2.3 From c8c935677487ba6f7dd18d48e39ab30bbb4cb5d9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:41 -0700 Subject: perf test: Add target workload test in 'perf record' tests Add a subtest which profiles the given workload on the command line. As it's a minimal requirement, the test should run ok so it doesn't skip the test even if it failed to run the 'perf record' command. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 345764afb745..c59d1459c960 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -174,11 +174,29 @@ test_system_wide() { echo "Basic --system-wide mode test [Success]" } +test_workload() { + echo "Basic target workload test" + if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + err=1 + return + fi + echo "Basic target workload test [Success]" +} + build_test_program test_per_thread test_register_capture test_system_wide +test_workload cleanup exit $err -- cgit v1.2.3 From 7f4ed3f0b1fc951bcb91a78bf68ac45aac13ec0b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:42 -0700 Subject: perf test: Test record with --threads option The --threads option changed the 'perf record' behavior significantly, so it'd be nice if we test it separately. Add --threads options with different argument in each test supported and check the result. Also update the cleanup routine because threads recording produces data in a directory. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index c59d1459c960..01aa9531b369 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -13,8 +13,8 @@ testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) testsym="test_loop" cleanup() { - rm -f "${perfdata}" - rm -f "${perfdata}".old + rm -rf "${perfdata}" + rm -rf "${perfdata}".old if [ "${testprog}" != "true" ]; then rm -f "${testprog}" @@ -171,6 +171,19 @@ test_system_wide() { err=1 return fi + if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed --threads missing output]" + err=1 + return + fi echo "Basic --system-wide mode test [Success]" } @@ -188,6 +201,19 @@ test_workload() { err=1 return fi + if ! perf record -e cpu-clock,cs --threads=package \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed --threads missing output]" + err=1 + return + fi echo "Basic target workload test [Success]" } -- cgit v1.2.3 From 8b380e6afd124d18cd51a43d2505e4eb05e2ba09 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Oct 2022 10:26:43 -0700 Subject: perf test: Do not set TEST_SKIP for record subtests It now has 4 sub tests and at least one of them should run. But once the TEST_SKIP (= 2) return value is set, it won't be overwritten unless there's a failure. I think we should return success when one or more tests are skipped but the remaining subtests are passed. So update the test code not to set the err variable when it skips the test. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221020172643.3458767-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 01aa9531b369..e93b3a8871fe 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -81,10 +81,6 @@ test_per_thread() { if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null then echo "Per-thread record [Skipped event not supported]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null @@ -131,10 +127,6 @@ test_register_capture() { if ! perf list | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' @@ -159,10 +151,6 @@ test_system_wide() { if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null then echo "System-wide record [Skipped not supported]" - if [ $err -ne 1 ] - then - err=2 - fi return fi if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" -- cgit v1.2.3 From 65319890c32db29fb56b41f84265a2c7029943f4 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 18 Oct 2022 10:41:35 +0100 Subject: perf tools: Fix "kernel lock contention analysis" test by not printing warnings in quiet mode Especially when CONFIG_LOCKDEP and other debug configs are enabled, Perf can print the following warning when running the "kernel lock contention analysis" test: Warning: Processed 1378918 events and lost 4 chunks! Check IO/CPU overload! Warning: Processed 4593325 samples and lost 70.00%! The test already supplies -q to run in quiet mode, so extend quiet mode to perf_stdio__warning() and also ui__warning() for consistency. This fixes the following failure due to the extra lines counted: perf test "lock cont" -vvv 82: kernel lock contention analysis test : --- start --- test child forked, pid 3125 Testing perf lock record and perf lock contention [Fail] Recorded result count is not 1: 9 test child finished with -1 ---- end ---- kernel lock contention analysis test: FAILED! Fixes: ec685de25b6718f8 ("perf test: Add kernel lock contention test") Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221018094137.783081-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/util.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index 689b27c34246..1d38ddf01b60 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -15,6 +15,9 @@ static int perf_stdio__error(const char *format, va_list args) static int perf_stdio__warning(const char *format, va_list args) { + if (quiet) + return 0; + fprintf(stderr, "Warning:\n"); vfprintf(stderr, format, args); return 0; @@ -45,6 +48,8 @@ int ui__warning(const char *format, ...) { int ret; va_list args; + if (quiet) + return 0; va_start(args, format); ret = perf_eops->warning(format, args); -- cgit v1.2.3 From a527c2c1e2d43e9f145f5d0c5d6ac0bdf5220e22 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 18 Oct 2022 10:41:36 +0100 Subject: perf tools: Make quiet mode consistent between tools Use the global quiet variable everywhere so that all tools hide warnings in quiet mode and update the documentation to reflect this. 'perf probe' claimed that errors are not printed in quiet mode but I don't see this so remove it from the docs. Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221018094137.783081-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 2 +- tools/perf/Documentation/perf-diff.txt | 2 +- tools/perf/Documentation/perf-lock.txt | 2 +- tools/perf/Documentation/perf-probe.txt | 2 +- tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-stat.txt | 4 ++-- tools/perf/bench/numa.c | 9 +++++---- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-lock.c | 2 +- tools/perf/builtin-probe.c | 7 +++---- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 8 ++++---- tools/perf/util/stat.h | 1 - 16 files changed, 25 insertions(+), 26 deletions(-) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 18fcc52809fb..980fe2c29275 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -41,7 +41,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index be65bd55ab2a..f3067a4af294 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -f:: --force:: diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 3b1e16563b79..4958a1ffa1cc 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,7 +42,7 @@ COMMON OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -D:: --dump-raw-trace:: diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 080981d38d7b..7f8e8ba3a787 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -57,7 +57,7 @@ OPTIONS -q:: --quiet:: - Be quiet (do not show any messages including errors). + Do not show any warnings or messages. Can not use with -v. -a:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e41ae950fdc3..9ea6d44aca58 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -282,7 +282,7 @@ OPTIONS -q:: --quiet:: - Don't print any message, useful for scripting. + Don't print any warnings or messages, useful for scripting. -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4533db2ee56b..4fa509b15948 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -27,7 +27,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index d7ff1867feda..18abdc1dce05 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -354,8 +354,8 @@ forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. --quiet:: -Don't print output. This is useful with perf stat record below to only -write data to the perf.data file. +Don't print output, warnings or messages. This is useful with perf stat +record below to only write data to the perf.data file. STAT RECORD ----------- diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index e78dedf9e682..9717c6c17433 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,6 @@ struct params { long bytes_thread; int nr_tasks; - bool show_quiet; bool show_convergence; bool measure_convergence; @@ -197,7 +197,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), + OPT_BOOLEAN('q', "quiet" , &quiet, + "quiet mode (do not show any warnings or messages)"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -1474,7 +1475,7 @@ static int init(void) /* char array in count_process_nodes(): */ BUG_ON(g->p.nr_nodes < 0); - if (g->p.show_quiet && !g->p.show_details) + if (quiet && !g->p.show_details) g->p.show_details = -1; /* Some memory should be specified: */ @@ -1553,7 +1554,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (!g->p.show_quiet) + if (!quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f839e69492e8..517d928c00e3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -525,7 +525,7 @@ int cmd_annotate(int argc, const char **argv) OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), #ifdef HAVE_GTK2_SUPPORT diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d925096dd7f0..ed07cc6cca56 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1260,7 +1260,7 @@ static const char * const diff_usage[] = { static const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 9722d4ab2e55..66520712a167 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1869,7 +1869,7 @@ int cmd_lock(int argc, const char **argv) "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_END() }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index f62298f5db3b..2ae50fc9e597 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -40,7 +40,6 @@ static struct { int command; /* Command short_name */ bool list_events; bool uprobes; - bool quiet; bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; @@ -514,8 +513,8 @@ __cmd_probe(int argc, const char **argv) struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), - OPT_BOOLEAN('q', "quiet", ¶ms.quiet, - "be quiet (do not show any messages)"), + OPT_BOOLEAN('q', "quiet", &quiet, + "be quiet (do not show any warnings or messages)"), OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT", "list up probe events", opt_set_filter_with_command, DEFAULT_LIST_FILTER), @@ -634,7 +633,7 @@ __cmd_probe(int argc, const char **argv) if (ret) return ret; - if (params.quiet) { + if (quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); return -EINVAL; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e128b855ddde..59f3d98a0196 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3388,7 +3388,7 @@ static struct option __record_options[] = { &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8361890176c2..b6d77d3da64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1222,7 +1222,7 @@ int cmd_report(int argc, const char **argv) "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9d35a3338976..e52601a54b26 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1033,7 +1033,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; - if (stat_config.quiet) + if (quiet) return; evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); @@ -1283,8 +1283,8 @@ static struct option stat_options[] = { "print summary for interval mode"), OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, "don't print 'summary' for CSV summary output"), - OPT_BOOLEAN(0, "quiet", &stat_config.quiet, - "don't print output (useful with record)"), + OPT_BOOLEAN(0, "quiet", &quiet, + "don't print any output, messages or warnings (useful with record)"), OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", "Only enable events on applying cpu with this type " "for hybrid platform (e.g. core or atom)", @@ -2383,7 +2383,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output && !stat_config.quiet) { + if (!output && !quiet) { struct timespec tm; mode = append_file ? "a" : "w"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 382a1ab92ce1..499c3bf81333 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -160,7 +160,6 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool stop_read_counter; - bool quiet; bool iostat_run; char *user_requested_cpu_list; bool system_wide; -- cgit v1.2.3 From cff624146450bd25a1acea0439b7654167e0f722 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 13:50:17 -0300 Subject: perf bpf: No need to include compiler.h when HAVE_LIBBPF_SUPPORT is true Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_map.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h index d6abd5e47af8..c2f7c13cba23 100644 --- a/tools/perf/util/bpf_map.h +++ b/tools/perf/util/bpf_map.h @@ -3,7 +3,6 @@ #define __PERF_BPF_MAP_H 1 #include -#include struct bpf_map; #ifdef HAVE_LIBBPF_SUPPORT @@ -12,6 +11,8 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp); #else +#include + static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused) { return 0; -- cgit v1.2.3 From 8d0d129e94d4518fd17c13b4991ff10b7f4cd85a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 13:52:17 -0300 Subject: perf bpf: No need to include headers just use forward declarations In the bpf-prologue.h header we are just using pointers, so no need to include headers for that, just provide forward declarations for those types. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-prologue.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index c50c7358009f..66dcf751ef65 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -6,9 +6,8 @@ #ifndef __BPF_PROLOGUE_H #define __BPF_PROLOGUE_H -#include -#include -#include "probe-event.h" +struct probe_trace_arg; +struct bpf_insn; #define BPF_PROLOGUE_MAX_ARGS 3 #define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 @@ -19,6 +18,7 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, struct bpf_insn *new_prog, size_t *new_cnt, size_t cnt_space); #else +#include #include static inline int -- cgit v1.2.3 From 08043330167f1e21abe60ff7e124ed87d4fd029d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 17:02:23 -0300 Subject: perf branch: Remove some needless headers, add a needed one map_symbol.h is needed because we have structs that contains 'struct addr_map_symbol', so add it, remove the others. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index f838b23db180..94f36a187fe7 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -7,11 +7,9 @@ * detected in at least musl libc, used in Alpine Linux. -acme */ #include -#include -#include -#include #include #include +#include "util/map_symbol.h" #include "event.h" struct branch_flags { -- cgit v1.2.3 From 9823147da6c893d9295949e5ed982a8630deb6db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Oct 2022 17:24:27 -0300 Subject: perf tools: Move 'struct perf_sample' to a separate header file to disentangle headers Some places were including event.h just to get 'struct perf_sample', move it to a separate place so that we speed up a bit the build. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/unwind-libdw.c | 2 +- tools/perf/arch/arm64/util/machine.c | 1 + tools/perf/arch/arm64/util/unwind-libdw.c | 2 +- tools/perf/arch/powerpc/util/event.c | 1 + tools/perf/arch/powerpc/util/unwind-libdw.c | 2 +- tools/perf/arch/s390/util/unwind-libdw.c | 1 + tools/perf/arch/x86/tests/sample-parsing.c | 1 + tools/perf/arch/x86/util/event.c | 1 + tools/perf/arch/x86/util/unwind-libdw.c | 2 +- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-mem.c | 1 + tools/perf/tests/dlfilter-test.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 1 + tools/perf/tests/parse-no-sample-id-all.c | 1 + tools/perf/tests/perf-record.c | 1 + tools/perf/tests/perf-time-to-tsc.c | 1 + tools/perf/tests/sw-clock.c | 1 + tools/perf/tests/switch-tracking.c | 1 + tools/perf/util/amd-sample-raw.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/branch.h | 2 +- tools/perf/util/data-convert-bt.c | 1 + tools/perf/util/event.h | 111 +------------------ tools/perf/util/evlist.c | 1 + .../util/intel-pt-decoder/intel-pt-insn-decoder.c | 1 + tools/perf/util/perf_regs.c | 2 +- tools/perf/util/s390-cpumsf.c | 1 + tools/perf/util/s390-sample-raw.c | 1 + tools/perf/util/sample.h | 117 +++++++++++++++++++++ tools/perf/util/trace-event-scripting.c | 2 +- 31 files changed, 147 insertions(+), 118 deletions(-) create mode 100644 tools/perf/util/sample.h diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index b7692cb0c733..1834a0cd9ce3 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 41c1596e5207..235a0a1e1ec7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -7,6 +7,7 @@ #include "symbol.h" #include "callchain.h" #include "record.h" +#include "util/perf_regs.h" void arch__add_leaf_frame_record_opts(struct record_opts *opts) { diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index a50941629649..09385081bb03 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index cf430a4c55b9..77d8cc2b5691 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -9,6 +9,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "../../../util/sample.h" void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c index 7b2d96ec28e3..e616642c754c 100644 --- a/tools/perf/arch/powerpc/util/unwind-libdw.c +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -3,7 +3,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ static const int special_regs[3][2] = { diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c index 387c698cdd1b..7d92452d5287 100644 --- a/tools/perf/arch/s390/util/unwind-libdw.c +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -3,6 +3,7 @@ #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" #include "../../util/event.h" +#include "../../util/sample.h" #include "dwarf-regs-table.h" diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index bfbd3662b69e..690c7c07e90d 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -10,6 +10,7 @@ #include "event.h" #include "evsel.h" #include "debug.h" +#include "util/sample.h" #include "util/synthetic-events.h" #include "tests/tests.h" diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 55ff6aec10fd..a3acefe6d0c6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -10,6 +10,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "util/sample.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index eea2bf87232b..ef71e8bf80bf 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 17672790f123..4561bda0ce6a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -19,10 +19,10 @@ #include "util/data.h" #include "util/stat.h" #include "util/debug.h" -#include "util/event.h" #include "util/symbol.h" #include "util/session.h" #include "util/build-id.h" +#include "util/sample.h" #include "util/synthetic-events.h" #define MMAP_DEV_MAJOR 8 diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 923fb8316fda..dedd612eae5e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -20,6 +20,7 @@ #include "util/symbol.h" #include "util/pmu.h" #include "util/pmu-hybrid.h" +#include "util/sample.h" #include "util/string2.h" #include diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 84352d55347d..99aa72e425e4 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -33,6 +33,7 @@ #include "archinsn.h" #include "dlfilter.h" #include "tests.h" +#include "util/sample.h" #define MAP_START 0x400000 diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 8322fc2295fa..6377906c1318 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -10,6 +10,7 @@ #include "thread_map.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include #include #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index a7b2800652e4..888df8eca981 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -14,6 +14,7 @@ #include "util/mmap.h" #include #include +#include "util/sample.h" #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index d62e31595ab2..202f0a9a6796 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "header.h" #include "debug.h" +#include "util/sample.h" static int process_event(struct evlist **pevlist, union perf_event *event) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 7aa946aa886d..d82539e2ae64 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -11,6 +11,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index c3aaa1ddff29..efcd71c2738a 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,6 +20,7 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" +#include "util/sample.h" /* * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 9cd6fec375ee..4d7493fa0105 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -13,6 +13,7 @@ #include "util/evlist.h" #include "util/cpumap.h" #include "util/mmap.h" +#include "util/sample.h" #include "util/thread_map.h" #include #include diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 87f565c7f650..b3bd14b025a8 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -19,6 +19,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include "pmu.h" static int spin_sleep(void) diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 238305868644..b0e70ce9d87a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -16,6 +16,7 @@ #include "evlist.h" #include "sample-raw.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 46ada5ec3f9a..265d20cc126b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -59,6 +59,7 @@ #include #include "symbol/kallsyms.h" #include +#include "util/sample.h" /* * Make a group from 'leader' to 'last', requiring that the events were not diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 94f36a187fe7..d6017c9b1872 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -10,7 +10,7 @@ #include #include #include "util/map_symbol.h" -#include "event.h" +#include "util/sample.h" struct branch_flags { union { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 9e0aee276df8..c65cdaf6975e 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -34,6 +34,7 @@ #include #include "util.h" #include "clockid.h" +#include "util/sample.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 65495f6945b4..8b71ac1af81b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -15,6 +15,7 @@ struct dso; struct machine; struct perf_event_attr; +struct perf_sample; #ifdef __LP64__ /* @@ -42,61 +43,6 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) -/* number of register is bound by the number of bits in regs_dump::mask (64) */ -#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) - -struct regs_dump { - u64 abi; - u64 mask; - u64 *regs; - - /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; - u64 cache_mask; -}; - -struct stack_dump { - u16 offset; - u64 size; - char *data; -}; - -struct sample_read_value { - u64 value; - u64 id; /* only if PERF_FORMAT_ID */ - u64 lost; /* only if PERF_FORMAT_LOST */ -}; - -struct sample_read { - u64 time_enabled; - u64 time_running; - union { - struct { - u64 nr; - struct sample_read_value *values; - } group; - struct sample_read_value one; - }; -}; - -static inline size_t sample_read_value_size(u64 read_format) -{ - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_LOST) - return sizeof(struct sample_read_value); - else - return offsetof(struct sample_read_value, lost); -} - -static inline struct sample_read_value * -next_sample_read_value(struct sample_read_value *v, u64 read_format) -{ - return (void *)v + sample_read_value_size(read_format); -} - -#define sample_read_group__for_each(v, nr, rf) \ - for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) - struct ip_callchain { u64 nr; u64 ips[]; @@ -138,52 +84,6 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) -#define MAX_INSN 16 - -struct aux_sample { - u64 size; - void *data; -}; - -struct perf_sample { - u64 ip; - u32 pid, tid; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - u64 period; - u64 weight; - u64 transaction; - u64 insn_cnt; - u64 cyc_cnt; - u32 cpu; - u32 raw_size; - u64 data_src; - u64 phys_addr; - u64 data_page_size; - u64 code_page_size; - u64 cgroup; - u32 flags; - u32 machine_pid; - u32 vcpu; - u16 insn_len; - u8 cpumode; - u16 misc; - u16 ins_lat; - u16 p_stage_cyc; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ - char insn[MAX_INSN]; - void *raw_data; - struct ip_callchain *callchain; - struct branch_stack *branch_stack; - struct regs_dump user_regs; - struct regs_dump intr_regs; - struct stack_dump user_stack; - struct sample_read read; - struct aux_sample aux_sample; -}; - #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ @@ -342,15 +242,6 @@ struct perf_synth_intel_iflag_chg { u64 branch_ip; /* If via_branch */ }; -/* - * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get - * 8-byte alignment. - */ -static inline void *perf_sample__synth_ptr(struct perf_sample *sample) -{ - return sample->raw_data - 4; -} - static inline void *perf_synth__raw_data(void *p) { return p + 4; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6612b00949e7..112850d629cb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -29,6 +29,7 @@ #include "util/evsel_fprintf.h" #include "util/evlist-hybrid.h" #include "util/pmu.h" +#include "util/sample.h" #include #include #include diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 1376077183f7..22308dd93010 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -18,6 +18,7 @@ #include "intel-pt-insn-decoder.h" #include "dump-insn.h" +#include "util/sample.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 872dd3d38782..57a567ee2cea 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,7 +2,7 @@ #include #include #include "perf_regs.h" -#include "event.h" +#include "util/sample.h" int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f3fdad28a852..6fe478b0b61b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -163,6 +163,7 @@ #include "s390-cpumsf-kernel.h" #include "s390-cpumcf-kernel.h" #include "config.h" +#include "util/sample.h" struct s390_cpumsf { struct auxtrace auxtrace; diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 9a631d97471c..c10b891dbad6 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -28,6 +28,7 @@ #include "sample-raw.h" #include "s390-cpumcf-kernel.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static size_t ctrset_size(struct cf_ctrset_entry *set) { diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h new file mode 100644 index 000000000000..60ec79d4eea4 --- /dev/null +++ b/tools/perf/util/sample.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SAMPLE_H +#define __PERF_SAMPLE_H + +#include +#include + +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + +struct regs_dump { + u64 abi; + u64 mask; + u64 *regs; + + /* Cached values/mask filled by first register access. */ + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; + u64 cache_mask; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + +struct sample_read_value { + u64 value; + u64 id; /* only if PERF_FORMAT_ID */ + u64 lost; /* only if PERF_FORMAT_LOST */ +}; + +struct sample_read { + u64 time_enabled; + u64 time_running; + union { + struct { + u64 nr; + struct sample_read_value *values; + } group; + struct sample_read_value one; + }; +}; + +static inline size_t sample_read_value_size(u64 read_format) +{ + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_LOST) + return sizeof(struct sample_read_value); + else + return offsetof(struct sample_read_value, lost); +} + +static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format) +{ + return (void *)v + sample_read_value_size(read_format); +} + +#define sample_read_group__for_each(v, nr, rf) \ + for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) + +#define MAX_INSN 16 + +struct aux_sample { + u64 size; + void *data; +}; + +struct perf_sample { + u64 ip; + u32 pid, tid; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + u64 period; + u64 weight; + u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; + u32 cpu; + u32 raw_size; + u64 data_src; + u64 phys_addr; + u64 data_page_size; + u64 code_page_size; + u64 cgroup; + u32 flags; + u32 machine_pid; + u32 vcpu; + u16 insn_len; + u8 cpumode; + u16 misc; + u16 ins_lat; + u16 p_stage_cyc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + char insn[MAX_INSN]; + void *raw_data; + struct ip_callchain *callchain; + struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct regs_dump intr_regs; + struct stack_dump user_stack; + struct sample_read read; + struct aux_sample aux_sample; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +#endif /* __PERF_SAMPLE_H */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 7172ca05265f..636a010d929b 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -12,9 +12,9 @@ #include "debug.h" #include "trace-event.h" -#include "event.h" #include "evsel.h" #include +#include "util/sample.h" struct scripting_context *scripting_context; -- cgit v1.2.3 From 628d69995e66343266475e6afc76192f5878b605 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:49:58 -0300 Subject: perf kwork: Remove includes not needed in kwork.h Leave just some forward declarations for pointers, move the includes to where they are really needed. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kwork.c | 6 ++++++ tools/perf/util/bpf_kwork.c | 3 +++ tools/perf/util/kwork.h | 12 ++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index fb8c63656ad8..4ffbf5908070 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -6,10 +6,15 @@ */ #include "builtin.h" +#include "perf.h" #include "util/data.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/header.h" #include "util/kwork.h" #include "util/debug.h" +#include "util/session.h" #include "util/symbol.h" #include "util/thread.h" #include "util/string2.h" @@ -21,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c index b629dd679d3f..6eb2c78fd7f4 100644 --- a/tools/perf/util/bpf_kwork.c +++ b/tools/perf/util/bpf_kwork.c @@ -7,15 +7,18 @@ #include #include +#include #include #include #include #include "util/debug.h" +#include "util/evsel.h" #include "util/kwork.h" #include +#include #include "util/bpf_skel/kwork_trace.skel.h" diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h index 320c0a6d2e08..53b7327550b8 100644 --- a/tools/perf/util/kwork.h +++ b/tools/perf/util/kwork.h @@ -1,16 +1,16 @@ #ifndef PERF_UTIL_KWORK_H #define PERF_UTIL_KWORK_H -#include "perf.h" - #include "util/tool.h" -#include "util/event.h" -#include "util/evlist.h" -#include "util/session.h" #include "util/time-utils.h" -#include #include +#include +#include +#include + +struct perf_sample; +struct perf_session; enum kwork_class_type { KWORK_CLASS_IRQ, -- cgit v1.2.3 From 7e5c6f2c1aa2daa0d8aca657377450529f381fe6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: perf machine: Move machine__resolve() from event.h Its a machine method, so move it to machine.h, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 3 --- tools/perf/util/machine.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8b71ac1af81b..ea5bd1f62b0f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -337,9 +337,6 @@ int perf_event__process(struct perf_tool *tool, struct addr_location; -int machine__resolve(struct machine *machine, struct addr_location *al, - struct perf_sample *sample); - void addr_location__put(struct addr_location *al); struct thread; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 74935dfaa937..6267c1d6f232 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -305,4 +305,7 @@ int machine__create_extra_kernel_map(struct machine *machine, int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_MACHINE_H */ -- cgit v1.2.3 From d1e633e4cdc0d06cec82d4772c025f13c3b25a6c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: perf symbol: Move addr_location__put() from event.h Its a addr_location method, so move it to symbol.h, where 'struct addr_location' is, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 3 --- tools/perf/util/symbol.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ea5bd1f62b0f..bc6c1e2206cf 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -336,9 +336,6 @@ int perf_event__process(struct perf_tool *tool, struct machine *machine); struct addr_location; - -void addr_location__put(struct addr_location *al); - struct thread; bool is_bts_event(struct perf_event_attr *attr); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b893dcc8ea6..e297de14184c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -132,6 +132,8 @@ struct addr_location { s32 socket; }; +void addr_location__put(struct addr_location *al); + int dso__load(struct dso *dso, struct map *map); int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated); -- cgit v1.2.3 From cde5671268faf6419026ebd5e0f1783b3a84cf39 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 16:54:46 -0300 Subject: perf thread: Move thread__resolve() from event.h Its a thread method, so move it to thread.h, this way some places that were using event.h just to get this prototype may stop doing so and speed up building and disentanble the header dependency graph. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 5 ----- tools/perf/util/thread.h | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index bc6c1e2206cf..6663a676eadc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -335,13 +335,8 @@ int perf_event__process(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine); -struct addr_location; -struct thread; - bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void thread__resolve(struct thread *thread, struct addr_location *al, - struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 241f300d7d6e..395c626699a9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -158,4 +158,7 @@ static inline bool thread__is_filtered(struct thread *thread) void thread__free_stitch_list(struct thread *thread); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_THREAD_H */ -- cgit v1.2.3 From fd8d5a3b076c033f5589186ac49d76e74b39f97f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:02:57 -0300 Subject: perf tests: Add missing event.h include It uses things like perf_event__name() but were not including event.h, where its prototype lives, fix it. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/perf-record.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 6377906c1318..e68ca6229756 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -5,6 +5,7 @@ #include #include "debug.h" +#include "event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index d82539e2ae64..1c4feec1adff 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -5,6 +5,7 @@ #include #include +#include "event.h" #include "evlist.h" #include "evsel.h" #include "debug.h" -- cgit v1.2.3 From 606f70ab7f5ace535514a5fa6f9ad62ae6e515eb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:42:52 -0300 Subject: perf mmap: Remove several unneeded includes from util/mmap.h Those headers are not needed in util/mmap.h, remove them. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cd4ccec7f361..f944c3cd5efa 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -2,18 +2,13 @@ #define __PERF_MMAP_H 1 #include -#include -#include #include -#include #include #include -#include #ifdef HAVE_AIO_SUPPORT #include #endif #include "auxtrace.h" -#include "event.h" #include "util/compress.h" struct aiocb; -- cgit v1.2.3 From 2e5a738abc69601a4ea4a3544ec29cab9189eaae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Oct 2022 17:44:07 -0300 Subject: perf evlist: Add missing util/event.h header Needed to get the event_attr_init() and perf_event_paranoid() prototypes that were being obtained indirectly, by sheer luck. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 112850d629cb..fbf3192bced9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -24,6 +24,7 @@ #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" +#include "util/event.h" #include "util/string2.h" #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" -- cgit v1.2.3 From 146edff3d7ed135269dae8abef3219083c45b21e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 12 Oct 2022 18:12:05 -0700 Subject: perf test: Parse events workaround for dash/minus Skip an event configuration for event names with a dash/minus in them. Events with a dash/minus in their name cause parsing issues as legacy encoding of events would use a dash/minus as a separator. The parser separates events with dashes into prefixes and suffixes and then recombines them. Unfortunately if an event has part of its name that matches a legacy token then the recombining fails. This is seen for branch-brs where branch is a legacy token. branch-brs was introduced to sysfs in: https://lore.kernel.org/all/20220322221517.2510440-5-eranian@google.com/ The failure is shown below as well as the workaround to use a config where the dash/minus isn't treated specially: ``` $ perf stat -e branch-brs true event syntax error: 'branch-brs' \___ parser error $ perf stat -e cpu/branch-brs/ true Performance counter stats for 'true': 46,179 cpu/branch-brs/ ``` Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221013011205.3151391-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 459afdb256a1..3440dd2616b0 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2237,6 +2237,19 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest pr_debug("Test PMU event failed for '%s'", name); ret = combine_test_results(ret, test_ret); } + /* + * Names containing '-' are recognized as prefixes and suffixes + * due to '-' being a legacy PMU separator. This fails when the + * prefix or suffix collides with an existing legacy token. For + * example, branch-brs has a prefix (branch) that collides with + * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix + * isn't expected after this. As event names in the config + * slashes are allowed a '-' in the name we check this works + * above. + */ + if (strchr(ent->d_name, '-')) + continue; + snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); e.name = name; e.check = test__checkevent_pmu_events_mix; -- cgit v1.2.3 From 005ef2b41b119caf65ac611109ce7ae68d857b46 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:25 -0700 Subject: perf lock contention: Fix memory sanitizer issue The msan reported a use-of-uninitialized-value warning for the struct lock_contention_data in lock_contention_read(). While it'd be filled by bpf_map_lookup_elem(), let's just initialize it to silence the warning. ==12524==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x562b0f16b1cd in lock_contention_read util/bpf_lock_contention.c:139:7 #1 0x562b0ef65ec6 in __cmd_contention builtin-lock.c:1737:3 #2 0x562b0ef65ec6 in cmd_lock builtin-lock.c:1992:8 #3 0x562b0ee7f50b in run_builtin perf.c:322:11 #4 0x562b0ee7efc1 in handle_internal_command perf.c:376:8 #5 0x562b0ee7e1e9 in run_argv perf.c:420:2 #6 0x562b0ee7e1e9 in main perf.c:550:3 #7 0x7f065f10e632 in __libc_start_main (/usr/lib64/libc.so.6+0x61632) #8 0x562b0edf2fa9 in _start (perf+0xfa9) SUMMARY: MemorySanitizer: use-of-uninitialized-value (perf+0xe15160) in lock_contention_read Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index fc4d613cb979..06466da792e4 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -110,7 +110,7 @@ int lock_contention_read(struct lock_contention *con) { int fd, stack; s32 prev_key, key; - struct lock_contention_data data; + struct lock_contention_data data = {}; struct lock_stat *st; struct machine *machine = con->machine; u64 stack_trace[con->max_stack]; -- cgit v1.2.3 From 0a277b6226703812ba90543d5bb49476e03f816e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:26 -0700 Subject: perf lock contention: Check --max-stack option The --max-stack option is used to allocate the BPF stack map and stack trace array in the userspace. Check the value properly before using. Practically it cannot be greater than the sysctl_perf_event_max_stack. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 66520712a167..6f79175365a8 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -24,6 +24,7 @@ #include "util/data.h" #include "util/string2.h" #include "util/map.h" +#include "util/util.h" #include #include @@ -1858,6 +1859,29 @@ static int parse_map_entry(const struct option *opt, const char *str, return 0; } +static int parse_max_stack(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + unsigned long *len = (unsigned long *)opt->value; + long val; + char *endptr; + + errno = 0; + val = strtol(str, &endptr, 0); + if (*endptr != '\0' || errno != 0) { + pr_err("invalid max stack depth: %s\n", str); + return -1; + } + + if (val < 0 || val > sysctl__max_stack()) { + pr_err("invalid max stack depth: %ld\n", val); + return -1; + } + + *len = val; + return 0; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { @@ -1913,9 +1937,9 @@ int cmd_lock(int argc, const char **argv) "Trace on existing thread id (exclusive to --pid)"), OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", "Max number of BPF map entries", parse_map_entry), - OPT_INTEGER(0, "max-stack", &max_stack_depth, - "Set the maximum stack depth when collecting lock contention, " - "Default: " __stringify(CONTENTION_STACK_DEPTH)), + OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", + "Set the maximum stack depth when collecting lopck contention, " + "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), OPT_INTEGER(0, "stack-skip", &stack_skip, "Set the number of stack depth to skip when finding a lock caller, " "Default: " __stringify(CONTENTION_STACK_SKIP)), -- cgit v1.2.3 From 9e9c5f3cf912c3e068b6c24d77f6f07ac242395b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:27 -0700 Subject: perf lock contention: Avoid variable length arrays The msan also warns about the use of VLA for stack_trace variable. We can dynamically allocate instead. While at it, simplify the error handle a bit (and fix bugs). Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221028180128.3311491-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 06466da792e4..0deec1178778 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -108,28 +108,36 @@ int lock_contention_stop(void) int lock_contention_read(struct lock_contention *con) { - int fd, stack; + int fd, stack, err = 0; s32 prev_key, key; struct lock_contention_data data = {}; - struct lock_stat *st; + struct lock_stat *st = NULL; struct machine *machine = con->machine; - u64 stack_trace[con->max_stack]; + u64 *stack_trace; + size_t stack_size = con->max_stack * sizeof(*stack_trace); fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); con->lost = skel->bss->lost; + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + return -1; + prev_key = 0; while (!bpf_map_get_next_key(fd, &prev_key, &key)) { struct map *kmap; struct symbol *sym; int idx = 0; + /* to handle errors in the loop body */ + err = -1; + bpf_map_lookup_elem(fd, &key, &data); st = zalloc(sizeof(*st)); if (st == NULL) - return -1; + break; st->nr_contended = data.count; st->wait_time_total = data.total_time; @@ -163,25 +171,32 @@ int lock_contention_read(struct lock_contention *con) st->name = strdup(sym->name); if (ret < 0 || st->name == NULL) - return -1; + break; } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) { - free(st); - return -1; + break; } if (verbose) { - st->callstack = memdup(stack_trace, sizeof(stack_trace)); - if (st->callstack == NULL) { - free(st); - return -1; - } + st->callstack = memdup(stack_trace, stack_size); + if (st->callstack == NULL) + break; } hlist_add_head(&st->hash_entry, con->result); prev_key = key; + + /* we're fine now, reset the values */ + st = NULL; + err = 0; } - return 0; + free(stack_trace); + if (st) { + free(st->name); + free(st); + } + + return err; } int lock_contention_finish(void) -- cgit v1.2.3 From c940fa276b5a4210255e0cb9766f06e38443303a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 28 Oct 2022 11:01:28 -0700 Subject: perf lock contention: Increase default stack skip to 4 In most configurations, it works well with skipping 4 entries by default. If some systems still have 3 BPF internal stack frames, the next frame should be in a lock function which will be skipped later when it tries to find a caller. So increasing to 4 won't affect such systems too. With --stack-skip=0, I can see something like this: 24 49.84 us 7.41 us 2.08 us mutex bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffffc045040e bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffffc045040e bpf_prog_e1b85959d520446c_contention_begin+0x12e 0xffffffff82ea2071 bpf_trace_run2+0x51 0xffffffff82de775b __bpf_trace_contention_begin+0xb 0xffffffff82c02045 __mutex_lock+0x245 0xffffffff82c019e3 __mutex_lock_slowpath+0x13 0xffffffff82c019c0 mutex_lock+0x20 0xffffffff830a083c kernfs_iop_permission+0x2c Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221028180128.3311491-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/lock-contention.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b8cb8830b7bc..e3c061b1795b 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -91,7 +91,7 @@ struct thread_stat { * Number of stack trace entries to skip when finding callers. * The first few entries belong to the locking implementation itself. */ -#define CONTENTION_STACK_SKIP 3 +#define CONTENTION_STACK_SKIP 4 /* * flags for lock:contention_begin -- cgit v1.2.3 From 98e4c68ddcaf3721df9bef809775a8e3562cb6f9 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:30 +0900 Subject: perf probe: Fix to avoid crashing if DW_AT_decl_file is NULL Since clang generates DWARF5 which sets DW_AT_decl_file as 0, dwarf_decl_file() thinks that is invalid and returns NULL. In that case 'perf probe' SIGSEGVs because it doesn't expect a NULL decl_file. This adds a dwarf_decl_file() return value check to avoid such SEGV with clang generated DWARF5 info. Without this, 'perf probe' crashes: $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Segmentation fault $ With this, it just warns about it: $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Debuginfo analysis failed. Error: Failed to show lines. $ Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051077.2100653.15626653369345128302.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 7 ++++++- tools/perf/util/probe-finder.c | 29 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 609ca1671501..406b7bdc851a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -137,7 +137,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, } out: - return *lineno ?: -ENOENT; + return (*lineno && *fname) ? *lineno : -ENOENT; } static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data); @@ -874,6 +874,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); dwarf_decl_line(rt_die, &decl); decf = dwarf_decl_file(rt_die); + if (!decf) { + pr_debug2("Failed to get the declared file name of %s\n", + dwarf_diename(rt_die)); + return -EINVAL; + } } else cu_die = rt_die; if (!cu_die) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 50d861a80f57..1aa8fcc41c76 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1063,6 +1063,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct probe_finder *pf = param->data; struct perf_probe_point *pp = &pf->pev->point; + const char *fname; /* Check tag and diename */ if (!die_is_func_def(sp_die) || @@ -1070,12 +1071,17 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; /* Check declared file */ - if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die))) + fname = dwarf_decl_file(sp_die); + if (!fname) { + pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n"); + return DWARF_CB_OK; + } + if (pp->file && fname && strtailcmp(pp->file, fname)) return DWARF_CB_OK; pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); - pf->fname = dwarf_decl_file(sp_die); + pf->fname = fname; if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1134,6 +1140,7 @@ struct pubname_callback_param { static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) { struct pubname_callback_param *param = data; + const char *fname; if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) { if (dwarf_tag(param->sp_die) != DW_TAG_subprogram) @@ -1143,9 +1150,11 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die)) return DWARF_CB_OK; - if (param->file && - strtailcmp(param->file, dwarf_decl_file(param->sp_die))) - return DWARF_CB_OK; + if (param->file) { + fname = dwarf_decl_file(param->sp_die); + if (!fname || strtailcmp(param->file, fname)) + return DWARF_CB_OK; + } param->found = 1; return DWARF_CB_ABORT; @@ -1779,7 +1788,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, } /* Verify the lineno and baseline are in a same file */ tmp = dwarf_decl_file(&spdie); - if (!tmp || strcmp(tmp, fname) != 0) + if (!tmp || (fname && strcmp(tmp, fname) != 0)) lineno = 0; } @@ -1889,10 +1898,14 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct line_finder *lf = param->data; struct line_range *lr = lf->lr; + const char *fname; /* Check declared file */ - if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) - return DWARF_CB_OK; + if (lr->file) { + fname = dwarf_decl_file(sp_die); + if (!fname || strtailcmp(lr->file, fname)) + return DWARF_CB_OK; + } if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { lf->fname = dwarf_decl_file(sp_die); -- cgit v1.2.3 From f828929ab7f0dc3353e4a617f94f297fa8f3dec3 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:39 +0900 Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute acccessor functions, so that it can find the specified attribute from abstact origin DIE etc. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 406b7bdc851a..216fc3d959e8 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formudata(&attr, result) != 0) return -ENOENT; @@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formsdata(&attr, result) != 0) return -ENOENT; -- cgit v1.2.3 From dc9a5d2ccd5c823cc05cafe75fcf19b682d8152c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:49 +0900 Subject: perf probe: Fix to get declared file name from clang DWARF5 Fix to get the declared file name even if it uses file index 0 in DWARF5, using custom die_get_decl_file() function. Actually, the DWARF5 standard says file index 0 of the DW_AT_decl_file is invalid(1), but there is a discussion and maybe this will be updated [2]. Anyway, clang generates such DWARF5 file for the linux kernel. Thus it must be handled. Without this, 'perf probe' returns an error: $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 Debuginfo analysis failed. Error: Failed to show lines. With this, it can handle the case correctly: $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10 11 ret = rw_verify_area(READ, file, pos, count); 12 if (ret) return ret; [1] DWARF5 specification 2.14 says "The value 0 indicates that no source file has been specified.") [2] http://wiki.dwarfstd.org/index.php?title=DWARF5_Line_Table_File_Numbers) Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731052936.2100653.13380621874859467731.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 47 ++++++++++++++++++++++++++++-------------- tools/perf/util/dwarf-aux.h | 3 +++ tools/perf/util/probe-finder.c | 14 ++++++------- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 216fc3d959e8..30b36b525681 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -123,7 +123,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, if (die_find_realfunc(cu_die, addr, &die_mem) && die_entrypc(&die_mem, &faddr) == 0 && faddr == addr) { - *fname = dwarf_decl_file(&die_mem); + *fname = die_get_decl_file(&die_mem); dwarf_decl_line(&die_mem, lineno); goto out; } @@ -486,6 +486,19 @@ static int die_get_decl_fileno(Dwarf_Die *pdie) return -ENOENT; } +/* Return the file name by index */ +static const char *die_get_file_name(Dwarf_Die *dw_die, int idx) +{ + Dwarf_Die cu_die; + Dwarf_Files *files; + + if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) || + dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) + return NULL; + + return dwarf_filesrc(files, idx, NULL, NULL); +} + /** * die_get_call_file - Get callsite file name of inlined function instance * @in_die: a DIE of an inlined function instance @@ -495,18 +508,22 @@ static int die_get_decl_fileno(Dwarf_Die *pdie) */ const char *die_get_call_file(Dwarf_Die *in_die) { - Dwarf_Die cu_die; - Dwarf_Files *files; - int idx; - - idx = die_get_call_fileno(in_die); - if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) || - dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) - return NULL; - - return dwarf_filesrc(files, idx, NULL, NULL); + return die_get_file_name(in_die, die_get_call_fileno(in_die)); } +/** + * die_get_decl_file - Find the declared file name of this DIE + * @dw_die: a DIE for something declared. + * + * Get declared file name of @dw_die. + * NOTE: Since some version of clang DWARF5 implementation incorrectly uses + * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for + * such cases. Use this function instead. + */ +const char *die_get_decl_file(Dwarf_Die *dw_die) +{ + return die_get_file_name(dw_die, die_get_decl_fileno(dw_die)); +} /** * die_find_child - Generic DIE search function in DIE tree @@ -790,7 +807,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) } if (addr) { - fname = dwarf_decl_file(in_die); + fname = die_get_decl_file(in_die); if (fname && dwarf_decl_line(in_die, &lineno) == 0) { lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) @@ -818,7 +835,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, int lineno; /* Handle function declaration line */ - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && die_entrypc(sp_die, &addr) == 0) { lw.retval = callback(fname, lineno, addr, data); @@ -873,7 +890,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); dwarf_decl_line(rt_die, &decl); - decf = dwarf_decl_file(rt_die); + decf = die_get_decl_file(rt_die); if (!decf) { pr_debug2("Failed to get the declared file name of %s\n", dwarf_diename(rt_die)); @@ -928,7 +945,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) dwarf_decl_line(&die_mem, &inl); if (inl != decl || - decf != dwarf_decl_file(&die_mem)) + decf != die_get_decl_file(&die_mem)) continue; } } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ee0fa19b5c4..7ec8bc1083bb 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -50,6 +50,9 @@ int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ const char *die_get_call_file(Dwarf_Die *in_die); +/* Get declared file name of a DIE */ +const char *die_get_decl_file(Dwarf_Die *dw_die); + /* Get type die */ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1aa8fcc41c76..54b49ce85c9f 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -763,7 +763,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) /* Skip if declared file name does not match */ if (fsp->file) { - file = dwarf_decl_file(fn_die); + file = die_get_decl_file(fn_die); if (!file || strcmp(fsp->file, file) != 0) return 0; } @@ -1071,7 +1071,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; /* Check declared file */ - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (!fname) { pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n"); return DWARF_CB_OK; @@ -1151,7 +1151,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) return DWARF_CB_OK; if (param->file) { - fname = dwarf_decl_file(param->sp_die); + fname = die_get_decl_file(param->sp_die); if (!fname || strtailcmp(param->file, fname)) return DWARF_CB_OK; } @@ -1750,7 +1750,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, goto post; } - fname = dwarf_decl_file(&spdie); + fname = die_get_decl_file(&spdie); if (addr == baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; @@ -1787,7 +1787,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, } } /* Verify the lineno and baseline are in a same file */ - tmp = dwarf_decl_file(&spdie); + tmp = die_get_decl_file(&spdie); if (!tmp || (fname && strcmp(tmp, fname) != 0)) lineno = 0; } @@ -1902,13 +1902,13 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) /* Check declared file */ if (lr->file) { - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (!fname || strtailcmp(lr->file, fname)) return DWARF_CB_OK; } if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { - lf->fname = dwarf_decl_file(sp_die); + lf->fname = die_get_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); lf->lno_s = lr->offset + lr->start; -- cgit v1.2.3 From f3c9bd4e16a503cb14891963428d388a4f70ffb8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:06 -0700 Subject: perf build: Update to C standard to gnu11 C11 has become the standard for mainstream kernel development [1], allowing it in the perf build enables libraries like stdatomic.h to be assumed to be present. This came up in the context of [2]. [1] https://lore.kernel.org/lkml/CAHk-=whWbENRz-vLY6vpESDLj6kGUTKO3khGtVfipHqwewh2HQ@mail.gmail.com/ [2] https://lore.kernel.org/lkml/20221024011024.462518-1-irogers@google.com/ Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 898226ea8cad..d3d3c13a9f25 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -307,7 +307,7 @@ CORE_CFLAGS += -ggdb3 CORE_CFLAGS += -funwind-tables CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra -CORE_CFLAGS += -std=gnu99 +CORE_CFLAGS += -std=gnu11 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall -- cgit v1.2.3 From 8ed28c2b56b78442989ef1afee2b968e9d51a65c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:07 -0700 Subject: perf record: Use sig_atomic_t for signal handlers This removes undefined behavior as described in: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Suggested-by: Leo Yan Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 59f3d98a0196..bd462a3f2bbd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -646,10 +646,10 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) return record__write(rec, map, bf, size); } -static volatile int signr = -1; -static volatile int child_finished; +static volatile sig_atomic_t signr = -1; +static volatile sig_atomic_t child_finished; #ifdef HAVE_EVENTFD_SUPPORT -static volatile int done_fd = -1; +static volatile sig_atomic_t done_fd = -1; #endif static void sig_handler(int sig) @@ -1926,7 +1926,7 @@ static void record__read_lost_samples(struct record *rec) } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 -- cgit v1.2.3 From 7f3374299f9762ba7946138bf0d5cfbd50da111b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:08 -0700 Subject: perf daemon: Use sig_atomic_t to avoid UB Use sig_atomic_t for a variable written to in a signal handler and read elsewhere. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 3ce0c960ccc9..7036ec92d47d 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -105,7 +105,7 @@ static const char * const daemon_usage[] = { NULL }; -static bool done; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { -- cgit v1.2.3 From 853596fb71f7c2f7ff0de7b13f09d6598d4d56cd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:09 -0700 Subject: perf ftrace: Use sig_atomic_t to avoid UB Use sig_atomic_t for a variable written to in a signal handler and read elsewhere. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 7de07bb16d23..d7fe00f66b83 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -36,8 +36,8 @@ #define DEFAULT_TRACER "function_graph" -static volatile int workload_exec_errno; -static bool done; +static volatile sig_atomic_t workload_exec_errno; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { -- cgit v1.2.3 From 057929f9d083e80c9b30c324add69d2054ca6d82 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:10 -0700 Subject: perf session: Change type to avoid undefined behaviour in a signal handler The 'session_done' variable is written to inside the signal handler of 'perf report' and 'perf script'. Switch its type to avoid undefined behavior. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a4f10de29ff..0e1a3d6bacb9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -2022,7 +2023,7 @@ static int perf_session__flush_thread_stacks(struct perf_session *session) NULL); } -volatile int session_done; +volatile sig_atomic_t session_done; static int __perf_session__process_decomp_events(struct perf_session *session); -- cgit v1.2.3 From 01513fdc18f395dbcc924bc5e9962b12f86f947a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:11 -0700 Subject: perf stat: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e52601a54b26..d5e1670bca20 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -173,7 +173,7 @@ static struct target target = { #define METRIC_ONLY_LEN 20 -static volatile pid_t child_pid = -1; +static volatile sig_atomic_t child_pid = -1; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; @@ -208,7 +208,7 @@ struct perf_stat { static struct perf_stat perf_stat; #define STAT_RECORD perf_stat.record -static volatile int done = 0; +static volatile sig_atomic_t done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, @@ -580,7 +580,7 @@ static void disable_counters(void) } } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 @@ -1039,7 +1039,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } -static volatile int signr = -1; +static volatile sig_atomic_t signr = -1; static void skip_signal(int signo) { -- cgit v1.2.3 From 691768968f2a13eba8d52e8475dca7feb288d4f2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:12 -0700 Subject: perf top: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4b3ff7687236..bb5bd241246b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -87,8 +87,8 @@ #include #include -static volatile int done; -static volatile int resize; +static volatile sig_atomic_t done; +static volatile sig_atomic_t resize; #define HEADER_LINE_NR 5 -- cgit v1.2.3 From 92ea0720ba9cf7f09589a711245c2da145125958 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Oct 2022 11:19:13 -0700 Subject: perf trace: Use sig_atomic_t to avoid undefined behaviour in a signal handler Use sig_atomic_t for variables written/accessed in signal handlers. This is undefined behavior as per: https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221024181913.630986-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d3c757769b96..72991528687e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1535,8 +1535,8 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) } static pid_t workload_pid = -1; -static bool done = false; -static bool interrupted = false; +static volatile sig_atomic_t done = false; +static volatile sig_atomic_t interrupted = false; static void sighandler_interrupt(int sig __maybe_unused) { -- cgit v1.2.3 From 14e4b9f4289aed2c8d4858cd750748041b6c434f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:31 -0700 Subject: perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility Don't use deprecated and now broken map style. Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Committer notes: Add /usr/include to the include path so that bpf/bpf_helpers.h can be found, remove sys/socket.h, adding the sockaddr_storage definition, also remove stdbool.h, both were preventing building the augmented_raw_syscalls.c file with clang, revisit later. Testing it: Asking for syscalls that have string arguments: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,string --max-events 10 0.000 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj", flags: RDONLY) = 13 0.158 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj", flags: RDONLY) = 13 0.215 thermald/1144 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone3/temp", flags: RDONLY) = 13 16.448 cgroupify/36478 openat(dfd: 4, filename: ".", flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 5 16.468 cgroupify/36478 newfstatat(dfd: 5, filename: "", statbuf: 0x7fffca5b4130, flag: 4096) = 0 16.473 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 16.499 systemd-oomd/972 newfstatat(dfd: 12, filename: "", statbuf: 0x7ffd2bc73cc0, flag: 4096) = 0 16.516 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 16.538 abrt-dump-jour/1370 newfstatat(dfd: 21, filename: "", statbuf: 0x7ffc651b8980, flag: 4096) = 0 16.540 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 # Networking syscalls: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,sendto*,connect* --max-events 10 0.000 isc-net-0005/1206 connect(fd: 512, uservaddr: { .family: INET, port: 53, addr: 23.211.132.65 }, addrlen: 16) = 0 0.070 isc-net-0002/1203 connect(fd: 515, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.031 isc-net-0006/1207 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.079 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a40611b0, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106 0.180 isc-net-0006/1207 connect(fd: 519, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.211 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a4061230, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106 0.298 isc-net-0006/1207 connect(fd: 515, uservaddr: { .family: INET, port: 53, addr: 96.7.49.67 }, addrlen: 16) = 0 0.109 isc-net-0004/1205 connect(fd: 518, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 0.164 isc-net-0002/1203 sendto(fd: 3, buff: 0x7f73ac064300, len: 107, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 107 0.247 isc-net-0002/1203 connect(fd: 522, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) # Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +- tools/perf/examples/bpf/augmented_raw_syscalls.c | 103 +++++++++++++++++++---- tools/perf/util/llvm-utils.c | 2 +- 3 files changed, 89 insertions(+), 19 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d3d3c13a9f25..59420676dee8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1239,7 +1239,8 @@ includedir = $(abspath $(prefix)/$(includedir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core -perf_include_dir = lib/perf/include +# FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance +perf_include_dir = /usr/include perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index a262dcd020f4..926238efd7d8 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -14,13 +14,21 @@ * code that will combine entry/exit in a strace like way. */ -#include +#include +#include #include -#include -#include + +// FIXME: These should come from system headers +typedef char bool; +typedef int pid_t; /* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __augmented_syscalls__ SEC(".maps"); /* * string_args_len: one per syscall arg, 0 means not a string or don't copy it, @@ -29,24 +37,39 @@ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); */ struct syscall { bool enabled; - u16 string_args_len[6]; + __u16 string_args_len[6]; }; -bpf_map(syscalls, ARRAY, int, struct syscall, 512); +struct syscalls { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct syscall); + __uint(max_entries, 512); +} syscalls SEC(".maps"); /* * What to augment at entry? * * Pointer arg payloads (filenames, etc) passed from userspace to the kernel */ -bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_enter SEC(".maps"); /* * What to augment at exit? * * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. */ -bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { unsigned long long common_tp_fields; @@ -66,7 +89,38 @@ struct augmented_arg { char value[PATH_MAX]; }; -pid_filter(pids_filtered); +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; struct augmented_args_payload { struct syscall_enter_args args; @@ -79,7 +133,12 @@ struct augmented_args_payload { }; // We need more tmp space than the BPF stack can give us -bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1); +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); static inline struct augmented_args_payload *augmented_args_payload(void) { @@ -90,14 +149,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void) static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) { /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); } static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = probe_read_str(&augmented_arg->value, arg_len, arg); + int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); augmented_arg->size = augmented_arg->err = 0; /* @@ -146,7 +205,7 @@ int sys_enter_connect(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -165,7 +224,7 @@ int sys_enter_sendto(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -234,6 +293,16 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { @@ -257,7 +326,7 @@ int sys_enter(struct syscall_enter_args *args) if (augmented_args == NULL) return 1; - probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); /* * Jump to syscall specific augmenter, even if the default one, @@ -278,7 +347,7 @@ int sys_exit(struct syscall_exit_args *args) if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&exit_args, sizeof(exit_args), args); + bpf_probe_read(&exit_args, sizeof(exit_args), args); /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the @@ -291,4 +360,4 @@ int sys_exit(struct syscall_exit_args *args) return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 2dc797007419..a5cac85783d8 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0) + if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0) goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); -- cgit v1.2.3 From 514607e3c0f0e381aa4f6fe866b70b1fa9bfae74 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:34 -0700 Subject: perf trace: hello fix libbpf 1.0+ compatibility Don't use deprecated and now broken map style. Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Switch to raw_syscalls:sys_enter to avoid the evlist being empty and fixing generating output. Committer testing: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/hello.c --call-graph=dwarf --max-events 5 0.000 perf/206852 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___sched_setaffinity_new (/usr/lib64/libc.so.6) 8.561 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __libc_read (/usr/lib64/libc.so.6) 8.571 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___ioctl (/usr/lib64/libc.so.6) 8.586 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___write (/usr/lib64/libc.so.6) 8.592 pipewire/2290 __bpf_stdout__(Hello, world) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) syscall_trace_enter.constprop.0 ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __timerfd_settime (/usr/lib64/libc.so.6) # Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/hello.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c index cf3c2fdc7f79..e9080b0df158 100644 --- a/tools/perf/examples/bpf/hello.c +++ b/tools/perf/examples/bpf/hello.c @@ -1,9 +1,27 @@ -#include +// SPDX-License-Identifier: GPL-2.0 +#include +#include -int syscall_enter(openat)(void *args) +struct __bpf_stdout__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __bpf_stdout__ SEC(".maps"); + +#define puts(from) \ + ({ const int __len = sizeof(from); \ + char __from[sizeof(from)] = from; \ + bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ + &__from, __len & (sizeof(from) - 1)); }) + +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) { puts("Hello, world\n"); return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From baddab891a21e5870723d182020fec445b0874b3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:35 -0700 Subject: perf trace: empty fix libbpf 1.0+ compatibility Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Add raw_syscalls:sys_enter to avoid the evlist being empty. Committer testing: # time perf trace -e ~acme/git/perf/tools/perf/examples/bpf/empty.c sleep 5 real 0m5.697s user 0m0.217s sys 0m0.453s # I.e. it sets up everything successfully (use -v to see the details) and filters out all syscalls, then exits when the workload (sleep 5) finishes. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/empty.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 7d7fb0c9fe76..3e296c0c53d7 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,12 @@ -#include +// SPDX-License-Identifier: GPL-2.0 +#include +#include -license(GPL); +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 71811e8c77e974a0ab978c86a2b32b3f2c82f455 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:36 -0700 Subject: perf trace: 5sec fix libbpf 1.0+ compatibility Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF headers. Committer testing: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c sleep 5 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000) # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c/max-stack=7/ sleep 5 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __GI___clock_nanosleep (/usr/lib64/libc.so.6) [0] ([unknown]) # Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index e6b6181c6dc6..3bd7fc17631f 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,13 +39,15 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include +#include #define NSEC_PER_SEC 1000000000L -int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec) +SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp") +int hrtimer_nanosleep(void *ctx, int err, long long sec) { return sec / NSEC_PER_SEC == 5ULL; } -license(GPL); +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From cfddf0d4a5571bcc94760c27729a60daefda38bb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 2 Nov 2022 21:54:37 -0700 Subject: perf bpf: Remove now unused BPF headers Example code has migrated to use standard BPF header files, remove unnecessary perf equivalents. Update install step to not try to copy these. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20221103045437.163510-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 5 --- tools/perf/include/bpf/bpf.h | 70 ----------------------------------- tools/perf/include/bpf/linux/socket.h | 24 ------------ tools/perf/include/bpf/pid_filter.h | 21 ----------- tools/perf/include/bpf/stdio.h | 16 -------- tools/perf/include/bpf/unistd.h | 10 ----- 6 files changed, 146 deletions(-) delete mode 100644 tools/perf/include/bpf/bpf.h delete mode 100644 tools/perf/include/bpf/linux/socket.h delete mode 100644 tools/perf/include/bpf/pid_filter.h delete mode 100644 tools/perf/include/bpf/stdio.h delete mode 100644 tools/perf/include/bpf/unistd.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a432e59afc42..67819f905611 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -960,11 +960,6 @@ endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBBPF - $(call QUIET_INSTALL, bpf-headers) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \ - $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux' $(call QUIET_INSTALL, bpf-examples) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h deleted file mode 100644 index b422aeef5339..000000000000 --- a/tools/perf/include/bpf/bpf.h +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _PERF_BPF_H -#define _PERF_BPF_H - -#include - -/* - * A helper structure used by eBPF C program to describe map attributes to - * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h: - */ -struct bpf_map { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -#define bpf_map(name, _type, type_key, type_val, _max_entries) \ -struct bpf_map SEC("maps") name = { \ - .type = BPF_MAP_TYPE_##_type, \ - .key_size = sizeof(type_key), \ - .value_size = sizeof(type_val), \ - .max_entries = _max_entries, \ -}; \ -struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ -}; \ -struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \ - ____btf_map_##name = { } - -/* - * FIXME: this should receive .max_entries as a parameter, as careful - * tuning of these limits is needed to avoid hitting limits that - * prevents other BPF constructs, such as tracepoint handlers, - * to get installed, with cryptic messages from libbpf, etc. - * For the current need, 'perf trace --filter-pids', 64 should - * be good enough, but this surely needs to be revisited. - */ -#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64) - -static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; -static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; - -static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *)BPF_FUNC_tail_call; - -#define SEC(NAME) __attribute__((section(NAME), used)) - -#define probe(function, vars) \ - SEC(#function "=" #function " " #vars) function - -#define syscall_enter(name) \ - SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name - -#define syscall_exit(name) \ - SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name - -#define license(name) \ -char _license[] SEC("license") = #name; \ -int _version SEC("version") = LINUX_VERSION_CODE; - -static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read; -static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str; - -static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; - -#endif /* _PERF_BPF_H */ diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h deleted file mode 100644 index 7f844568dab8..000000000000 --- a/tools/perf/include/bpf/linux/socket.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_SOCKET_H -#define _UAPI_LINUX_SOCKET_H - -/* - * Desired design of maximum size and alignment (see RFC2553) - */ -#define _K_SS_MAXSIZE 128 /* Implementation specific max size */ -#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) - /* Implementation specific desired alignment */ - -typedef unsigned short __kernel_sa_family_t; - -struct __kernel_sockaddr_storage { - __kernel_sa_family_t ss_family; /* address family */ - /* Following field(s) are implementation specific */ - char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; - /* space to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_family */ -} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ - -#define sockaddr_storage __kernel_sockaddr_storage - -#endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h deleted file mode 100644 index 6e61c4bdf548..000000000000 --- a/tools/perf/include/bpf/pid_filter.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#ifndef _PERF_BPF_PID_FILTER_ -#define _PERF_BPF_PID_FILTER_ - -#include - -#define pid_filter(name) pid_map(name, bool) - -static int pid_filter__add(struct bpf_map *pids, pid_t pid) -{ - bool value = true; - return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST); -} - -static bool pid_filter__has(struct bpf_map *pids, pid_t pid) -{ - return bpf_map_lookup_elem(pids, &pid) != NULL; -} - -#endif // _PERF_BPF_PID_FILTER_ diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h deleted file mode 100644 index 316af5b2ff35..000000000000 --- a/tools/perf/include/bpf/stdio.h +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include - -struct bpf_map SEC("maps") __bpf_stdout__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; - -#define puts(from) \ - ({ const int __len = sizeof(from); \ - char __from[__len] = from; \ - perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ - &__from, __len & (sizeof(from) - 1)); }) diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h deleted file mode 100644 index ca7877f9a976..000000000000 --- a/tools/perf/include/bpf/unistd.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#include - -static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; - -static pid_t getpid(void) -{ - return bpf_get_current_pid_tgid(); -} -- cgit v1.2.3 From 3cd65616f607cd3769647e2e0490e4048c0f289e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 11:15:51 -0300 Subject: perf examples bpf: Remove augmented_syscalls.c, the raw_syscalls one should be used instead The attempt at using BPF to copy syscall pointer arguments to show them like strace does started with sys_{enter,exit}_SYSCALL_NAME tracepoints, in tools/perf/examples/bpf/augmented_syscalls.c, but then achieving this result using raw_syscalls:{enter,exit} and BPF tail calls was deemed more flexible. The 'perf trace' codebase was adapted to using it while trying to continue supporting the old style per-syscall tracepoints, which at some point became too unwieldly and now isn't working properly. So lets scale back and concentrate on the augmented_raw_syscalls.c model on the way to using BPF skeletons. For the same reason remove the etcsnoop.c example, that used the old style per-tp syscalls just for the 'open' and 'openat' syscalls, looking at the pathnames starting with "/etc/", we should be able to do this later using filters, after we move to BPF skels. The augmented_raw_syscalls.c one continues to work, now with libbpf 1.0, after Ian work on using the libbpf map style: # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,open* --max-events 4 0.000 ping/194815 openat(dfd: CWD, filename: "/etc/hosts", flags: RDONLY|CLOEXEC) = 5 20.225 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 20.285 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 20.301 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21 # This is using this: # cat ~/.perfconfig [trace] show_zeros = yes show_duration = no no_inherit = yes args_alignment = 40 Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_syscalls.c | 169 --------------------------- tools/perf/examples/bpf/etcsnoop.c | 76 ------------ 2 files changed, 245 deletions(-) delete mode 100644 tools/perf/examples/bpf/augmented_syscalls.c delete mode 100644 tools/perf/examples/bpf/etcsnoop.c diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c deleted file mode 100644 index 524fdb8534b3..000000000000 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment syscalls with the contents of the pointer arguments. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them, that will - * check if perf_sample->raw_data is more than what is expected for each - * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the - * contents of pointer arguments. - */ - -#include -#include - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct syscall_exit_args { - unsigned long long common_tp_fields; - long syscall_nr; - long ret; -}; - -struct augmented_filename { - unsigned int size; - int reserved; - char value[256]; -}; - -#define augmented_filename_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - unsigned int len = sizeof(augmented_args); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ - len &= sizeof(augmented_args.filename.value) - 1; \ - } \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(open); - -struct syscall_enter_inotify_add_watch_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - char *filename_ptr; - long mask; -}; - -augmented_filename_syscall(inotify_add_watch); - -struct statbuf; - -struct syscall_enter_newstat_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - struct stat *statbuf; -}; - -augmented_filename_syscall(newstat); - -#ifndef _K_SS_MAXSIZE -#define _K_SS_MAXSIZE 128 -#endif - -#define augmented_sockaddr_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct sockaddr_storage addr; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args; \ - unsigned long addrlen = sizeof(augmented_args.addr); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ -/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ -/* if (addrlen > augmented_args.args.addrlen) */ \ -/* addrlen = augmented_args.args.addrlen; */ \ -/* */ \ - probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct sockaddr; - -struct syscall_enter_bind_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(bind); - -struct syscall_enter_connect_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(connect); - -struct syscall_enter_sendto_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - void *buff; - long len; - unsigned long flags; - struct sockaddr *addr_ptr; - long addr_len; -}; - -augmented_sockaddr_syscall(sendto); - -license(GPL); diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c deleted file mode 100644 index e81b535346c0..000000000000 --- a/tools/perf/examples/bpf/etcsnoop.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment the filename syscalls with the contents of the filename pointer argument - * filtering only those that do not start with /etc/. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. - */ - -#include - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct augmented_filename { - int size; - int reserved; - char value[64]; -}; - -#define augmented_filename_syscall_enter(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - char etc[6] = "/etc/"; \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ - return 0; \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(open); - -license(GPL); -- cgit v1.2.3 From b018899e620b8ee4529f43bd02e9e8e43043e33e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 11:45:36 -0300 Subject: perf bpf: Rename perf_include_dir to libbpf_include_dir As this is where we expect to find bpf/bpf_helpers.h, etc. This needs more work to make it follow LIBBPF_DYNAMIC=1 usage, i.e. when not using the system libbpf it should use the headers in the in-kernel sources libbpf in tools/lib/bpf. We need to do that anyway to avoid this mixup system libbpf and in-kernel files, so we'll get this sorted out that way. And this also may become moot as we move to using BPF skels for this feature. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 12 ++++++------ tools/perf/util/Build | 2 +- tools/perf/util/llvm-utils.c | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 59420676dee8..a7f6c0669fae 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1216,7 +1216,7 @@ endif # Among the variables below, these: # perfexecdir -# perf_include_dir +# libbpf_include_dir # perf_examples_dir # template_dir # mandir @@ -1240,7 +1240,7 @@ mandir = share/man infodir = share/info perfexecdir = libexec/perf-core # FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance -perf_include_dir = /usr/include +libbpf_include_dir = /usr/include perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates @@ -1273,7 +1273,7 @@ includedir_SQ = $(subst ','\'',$(includedir)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) -perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir)) +libbpf_include_dir_SQ = $(subst ','\'',$(libbpf_include_dir)) perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) @@ -1285,13 +1285,13 @@ srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) -perf_include_instdir = $(perf_include_dir) +perf_include_instdir = $(libbpf_include_dir) perf_examples_instdir = $(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) -perf_include_instdir = $(prefix)/$(perf_include_dir) +perf_include_instdir = $(prefix)/$(libbpf_include_dir) perf_examples_instdir = $(prefix)/$(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) tip_instdir = $(prefix)/$(tipdir) @@ -1353,7 +1353,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) -$(call detected_var,perf_include_dir_SQ) +$(call detected_var,libbpf_include_dir_SQ) $(call detected_var,perf_examples_dir_SQ) $(call detected_var,tipdir_SQ) $(call detected_var,srcdir_SQ) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e315ecaec323..47a7db3ad0a1 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -220,7 +220,7 @@ perf-$(CONFIG_CXX) += c++/ perf-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" +CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index a5cac85783d8..650ffe336f3a 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -463,7 +463,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char *pipe_template = NULL; const char *opts = llvm_param.opts; char *command_echo = NULL, *command_out; - char *perf_include_dir = system_path(PERF_INCLUDE_DIR); + char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR); if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0) + if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0) goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); @@ -556,7 +556,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(kbuild_dir); free(kbuild_include_opts); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); if (!p_obj_buf) free(obj_buf); @@ -572,7 +572,7 @@ errout: free(kbuild_include_opts); free(obj_buf); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); free(pipe_template); if (p_obj_buf) *p_obj_buf = NULL; -- cgit v1.2.3 From a9cd6c6766857212894dd736d9f2bc29f1416f6a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Nov 2022 16:44:52 -0300 Subject: perf trace: Add BPF augmenter to perf_event_open()'s 'struct perf_event_attr' arg Using BPF for that, doing a cleverish reuse of perf_event_attr__fprintf(), that really needs to be turned into __snprintf(), etc. But since the plan is to go the BTF way probably use libbpf's btf_dump__dump_type_data(). Example: [root@quaco ~]# perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,perf_event_open --max-events 10 perf stat --quiet sleep 0.001 fg 0.000 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 0.067 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 0.120 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 0.172 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 7 0.190 perf_event_open(attr_uptr: { size: 128, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 8 0.199 perf_event_open(attr_uptr: { size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 9 0.204 perf_event_open(attr_uptr: { size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 10 0.210 perf_event_open(attr_uptr: { size: 128, config: 0x5, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 11 [root@quaco ~]# Suggested-by: Ian Rogers Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/Y2V2Tpu+2vzJyon2@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 +- tools/perf/examples/bpf/augmented_raw_syscalls.c | 44 ++++++++++++++++++++++++ tools/perf/trace/beauty/perf_event_open.c | 44 ++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 72991528687e..5690c33c523b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1053,7 +1053,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "perf_event_open", - .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, + .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ }, + [2] = { .scnprintf = SCA_INT, /* cpu */ }, [3] = { .scnprintf = SCA_FD, /* group_fd */ }, [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 926238efd7d8..0599823e8ae1 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -129,6 +129,7 @@ struct augmented_args_payload { struct augmented_arg arg, arg2; }; struct sockaddr_storage saddr; + char __data[sizeof(struct augmented_arg)]; }; }; @@ -293,6 +294,49 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("!syscalls:sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + static pid_t getpid(void) { return bpf_get_current_pid_tgid(); diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c index 11d47dbe63bd..01ee15fe9d0c 100644 --- a/tools/perf/trace/beauty/perf_event_open.c +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, } #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags + +struct attr_fprintf_args { + size_t size, printed; + char *bf; + bool first; +}; + +static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv) +{ + struct attr_fprintf_args *args = priv; + size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val); + + args->first = false; + args->printed += printed; + return printed; +} + +static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused) +{ + struct attr_fprintf_args args = { + .printed = scnprintf(bf, size, "{ "), + .size = size, + .first = true, + .bf = bf, + }; + + perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args); + return args.printed + scnprintf(bf + args.printed, size - args.printed, " }"); +} + +static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size) +{ + return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros); +} + +static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} + +#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr -- cgit v1.2.3 From a9dfc46c67b52ad43b8e335e28f4cf8002c67793 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 5 Nov 2022 12:01:14 +0900 Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data DWARF version 5 standard Sec 2.14 says that Any debugging information entry representing the declaration of an object, module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and DW_AT_decl_column attributes, each of whose value is an unsigned integer constant. So it should be an unsigned integer data. Also, even though the standard doesn't clearly say the DW_AT_call_file is signed or unsigned, the elfutils (eu-readelf) interprets it as unsigned integer data and it is natural to handle it as unsigned integer data as same as DW_AT_decl_file. This changes the DW_AT_call_file as unsigned integer data too. Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 30b36b525681..b07414409771 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, return 0; } -/* Get attribute and translate it as a sdata */ -static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, - Dwarf_Sword *result) -{ - Dwarf_Attribute attr; - - if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || - dwarf_formsdata(&attr, result) != 0) - return -ENOENT; - - return 0; -} - /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) /* Get the call file index number in CU DIE */ static int die_get_call_fileno(Dwarf_Die *in_die) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) return (int)idx; else return -ENOENT; @@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) /* Get the declared file index number in CU DIE */ static int die_get_decl_fileno(Dwarf_Die *pdie) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) return (int)idx; else return -ENOENT; -- cgit v1.2.3 From 5d0557c75b2f2c7a868742d21a3ad94813ca97f4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:03 +0200 Subject: perf intel-pt: Start turning intel-pt-pkt-decoder-test.c into a suite of intel-pt subtests In preparation for adding more Intel PT testing, rename intel-pt-pkt-decoder-test.c to intel-pt-test.c. Subtests will later be added to intel-pt-test.c. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/Build | 2 +- .../arch/x86/tests/intel-pt-pkt-decoder-test.c | 315 --------------------- tools/perf/arch/x86/tests/intel-pt-test.c | 315 +++++++++++++++++++++ 3 files changed, 316 insertions(+), 316 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c create mode 100644 tools/perf/arch/x86/tests/intel-pt-test.c diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 70b5bcbc15df..6f4e8636c3bf 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += sample-parsing.o -perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c deleted file mode 100644 index 42237656f453..000000000000 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ /dev/null @@ -1,315 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include - -#include "intel-pt-decoder/intel-pt-pkt-decoder.h" - -#include "debug.h" -#include "tests/tests.h" -#include "arch-tests.h" - -/** - * struct test_data - Test data. - * @len: number of bytes to decode - * @bytes: bytes to decode - * @ctx: packet context to decode - * @packet: expected packet - * @new_ctx: expected new packet context - * @ctx_unchanged: the packet context must not change - */ -static struct test_data { - int len; - u8 bytes[INTEL_PT_PKT_MAX_SZ]; - enum intel_pt_pkt_ctx ctx; - struct intel_pt_pkt packet; - enum intel_pt_pkt_ctx new_ctx; - int ctx_unchanged; -} data[] = { - /* Padding Packet */ - {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 }, - /* Short Taken/Not Taken Packet */ - {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 }, - {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 }, - {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 }, - {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 }, - /* Long Taken/Not Taken Packet */ - {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 }, - {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 }, - {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 }, - {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 }, - /* Target IP Packet */ - {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 }, - {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 }, - {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 }, - {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 }, - {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 }, - {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 }, - /* Packet Generation Enable */ - {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 }, - {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 }, - {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 }, - {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 }, - {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 }, - {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 }, - /* Packet Generation Disable */ - {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 }, - {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 }, - {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 }, - {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 }, - {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 }, - {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 }, - /* Flow Update Packet */ - {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 }, - {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 }, - {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 }, - {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 }, - {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 }, - {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 }, - /* Paging Information Packet */ - {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060402}, 0, 0 }, - {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 }, - /* Mode Exec Packet */ - {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, - {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 1, 64}, 0, 0 }, - {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 2, 32}, 0, 0 }, - {2, {0x99, 0x04}, 0, {INTEL_PT_MODE_EXEC, 4, 16}, 0, 0 }, - {2, {0x99, 0x05}, 0, {INTEL_PT_MODE_EXEC, 5, 64}, 0, 0 }, - {2, {0x99, 0x06}, 0, {INTEL_PT_MODE_EXEC, 6, 32}, 0, 0 }, - /* Mode TSX Packet */ - {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 }, - {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 }, - {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 }, - /* Trace Stop Packet */ - {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 }, - /* Core:Bus Ratio Packet */ - {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 }, - /* Timestamp Counter Packet */ - {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 }, - /* Mini Time Counter Packet */ - {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 }, - /* TSC / MTC Alignment Packet */ - {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 }, - {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 }, - {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 }, - {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 }, - /* Cycle Count Packet */ - {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 }, - {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 }, - {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 }, - {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 }, - {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 }, - {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 }, - {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 }, - {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 }, - {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 }, - {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 }, - {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 }, - {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 }, - {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 }, - {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 }, - {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 }, - {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 }, - {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 }, - {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 }, - {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 }, - {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 }, - {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 }, - /* Virtual-Machine Control Structure Packet */ - {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 }, - /* Overflow Packet */ - {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 }, - {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, - {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, - /* Packet Stream Boundary*/ - {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 }, - {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, - {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, - /* PSB End Packet */ - {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 }, - /* Maintenance Packet */ - {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 }, - /* Write Data to PT Packet */ - {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 }, - {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 }, - {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 }, - {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 }, - /* Execution Stop Packet */ - {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 }, - {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 }, - /* Monitor Wait Packet */ - {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 }, - {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 }, - {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 }, - /* Power Entry Packet */ - {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 }, - {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 }, - {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 }, - {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 }, - /* Power Exit Packet */ - {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 }, - {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 }, - {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 }, - /* Block Begin Packet */ - {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, - {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 }, - {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 }, - {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 }, - /* 4-byte Block Item Packet */ - {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 }, - {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 }, - {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, - {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, - /* 8-byte Block Item Packet */ - {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, - {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 }, - {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, - {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, - /* Block End Packet */ - {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, - {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, - {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, - {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, - /* Control Flow Event Packet */ - {4, {0x02, 0x13, 0x01, 0x03}, 0, {INTEL_PT_CFE, 1, 3}, 0, 0 }, - {4, {0x02, 0x13, 0x81, 0x03}, 0, {INTEL_PT_CFE_IP, 1, 3}, 0, 0 }, - {4, {0x02, 0x13, 0x1f, 0x00}, 0, {INTEL_PT_CFE, 0x1f, 0}, 0, 0 }, - {4, {0x02, 0x13, 0x9f, 0xff}, 0, {INTEL_PT_CFE_IP, 0x1f, 0xff}, 0, 0 }, - /* */ - {11, {0x02, 0x53, 0x09, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_EVD, 0x09, 0x7060504030201}, 0, 0 }, - {11, {0x02, 0x53, 0x3f, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_EVD, 0x3f, 0x8070605040302}, 0, 0 }, - /* Terminator */ - {0, {0}, 0, {0, 0, 0}, 0, 0 }, -}; - -static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len) -{ - char desc[INTEL_PT_PKT_DESC_MAX]; - int ret, i; - - for (i = 0; i < len; i++) - pr_debug(" %02x", bytes[i]); - for (; i < INTEL_PT_PKT_MAX_SZ; i++) - pr_debug(" "); - pr_debug(" "); - ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); - if (ret < 0) { - pr_debug("intel_pt_pkt_desc failed!\n"); - return TEST_FAIL; - } - pr_debug("%s\n", desc); - - return TEST_OK; -} - -static void decoding_failed(struct test_data *d) -{ - pr_debug("Decoding failed!\n"); - pr_debug("Decoding: "); - dump_packet(&d->packet, d->bytes, d->len); -} - -static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len, - enum intel_pt_pkt_ctx new_ctx) -{ - decoding_failed(d); - - if (len != d->len) - pr_debug("Expected length: %d Decoded length %d\n", - d->len, len); - - if (packet->type != d->packet.type) - pr_debug("Expected type: %d Decoded type %d\n", - d->packet.type, packet->type); - - if (packet->count != d->packet.count) - pr_debug("Expected count: %d Decoded count %d\n", - d->packet.count, packet->count); - - if (packet->payload != d->packet.payload) - pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n", - (unsigned long long)d->packet.payload, - (unsigned long long)packet->payload); - - if (new_ctx != d->new_ctx) - pr_debug("Expected packet context: %d Decoded packet context %d\n", - d->new_ctx, new_ctx); - - return TEST_FAIL; -} - -static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet, - enum intel_pt_pkt_ctx ctx) -{ - enum intel_pt_pkt_ctx old_ctx = ctx; - - intel_pt_upd_pkt_ctx(packet, &ctx); - - if (ctx != old_ctx) { - decoding_failed(d); - pr_debug("Packet context changed!\n"); - return TEST_FAIL; - } - - return TEST_OK; -} - -static int test_one(struct test_data *d) -{ - struct intel_pt_pkt packet; - enum intel_pt_pkt_ctx ctx = d->ctx; - int ret; - - memset(&packet, 0xff, sizeof(packet)); - - /* Decode a packet */ - ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx); - if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) { - decoding_failed(d); - pr_debug("intel_pt_get_packet returned %d\n", ret); - return TEST_FAIL; - } - - /* Some packets must always leave the packet context unchanged */ - if (d->ctx_unchanged) { - int err; - - err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX); - if (err) - return err; - err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX); - if (err) - return err; - err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX); - if (err) - return err; - } - - /* Compare to the expected values */ - if (ret != d->len || packet.type != d->packet.type || - packet.count != d->packet.count || - packet.payload != d->packet.payload || ctx != d->new_ctx) - return fail(d, &packet, ret, ctx); - - pr_debug("Decoded ok:"); - ret = dump_packet(&d->packet, d->bytes, d->len); - - return ret; -} - -/* - * This test feeds byte sequences to the Intel PT packet decoder and checks the - * results. Changes to the packet context are also checked. - */ -int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct test_data *d = data; - int ret; - - for (d = data; d->len; d++) { - ret = test_one(d); - if (ret) - return ret; - } - - return TEST_OK; -} diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c new file mode 100644 index 000000000000..42237656f453 --- /dev/null +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +/** + * struct test_data - Test data. + * @len: number of bytes to decode + * @bytes: bytes to decode + * @ctx: packet context to decode + * @packet: expected packet + * @new_ctx: expected new packet context + * @ctx_unchanged: the packet context must not change + */ +static struct test_data { + int len; + u8 bytes[INTEL_PT_PKT_MAX_SZ]; + enum intel_pt_pkt_ctx ctx; + struct intel_pt_pkt packet; + enum intel_pt_pkt_ctx new_ctx; + int ctx_unchanged; +} data[] = { + /* Padding Packet */ + {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 }, + /* Short Taken/Not Taken Packet */ + {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 }, + {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 }, + {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 }, + {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 }, + /* Long Taken/Not Taken Packet */ + {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 }, + {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 }, + {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 }, + {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 }, + /* Target IP Packet */ + {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 }, + {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 }, + {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 }, + {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 }, + {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 }, + {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 }, + /* Packet Generation Enable */ + {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 }, + {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 }, + {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 }, + {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 }, + {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 }, + {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 }, + /* Packet Generation Disable */ + {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 }, + {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 }, + {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 }, + {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 }, + {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 }, + {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 }, + /* Flow Update Packet */ + {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 }, + {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 }, + {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 }, + {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 }, + {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 }, + {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 }, + /* Paging Information Packet */ + {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060402}, 0, 0 }, + {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 }, + /* Mode Exec Packet */ + {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, + {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 1, 64}, 0, 0 }, + {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 2, 32}, 0, 0 }, + {2, {0x99, 0x04}, 0, {INTEL_PT_MODE_EXEC, 4, 16}, 0, 0 }, + {2, {0x99, 0x05}, 0, {INTEL_PT_MODE_EXEC, 5, 64}, 0, 0 }, + {2, {0x99, 0x06}, 0, {INTEL_PT_MODE_EXEC, 6, 32}, 0, 0 }, + /* Mode TSX Packet */ + {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 }, + {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 }, + {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 }, + /* Trace Stop Packet */ + {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 }, + /* Core:Bus Ratio Packet */ + {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 }, + /* Timestamp Counter Packet */ + {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 }, + /* Mini Time Counter Packet */ + {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 }, + /* TSC / MTC Alignment Packet */ + {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 }, + {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 }, + {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 }, + {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 }, + /* Cycle Count Packet */ + {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 }, + {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 }, + {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 }, + {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 }, + {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 }, + {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 }, + {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 }, + {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 }, + {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 }, + {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 }, + {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 }, + {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 }, + {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 }, + {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 }, + {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 }, + {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 }, + {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 }, + {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 }, + {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 }, + {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 }, + {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 }, + /* Virtual-Machine Control Structure Packet */ + {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 }, + /* Overflow Packet */ + {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + /* Packet Stream Boundary*/ + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + /* PSB End Packet */ + {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 }, + /* Maintenance Packet */ + {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 }, + /* Write Data to PT Packet */ + {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 }, + {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 }, + {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 }, + {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 }, + /* Execution Stop Packet */ + {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 }, + {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 }, + /* Monitor Wait Packet */ + {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 }, + {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 }, + {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 }, + /* Power Entry Packet */ + {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 }, + {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 }, + {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 }, + {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 }, + /* Power Exit Packet */ + {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 }, + {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 }, + {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 }, + /* Block Begin Packet */ + {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 }, + {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 }, + /* 4-byte Block Item Packet */ + {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, + /* 8-byte Block Item Packet */ + {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, + /* Block End Packet */ + {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, + {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, + {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, + {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, + /* Control Flow Event Packet */ + {4, {0x02, 0x13, 0x01, 0x03}, 0, {INTEL_PT_CFE, 1, 3}, 0, 0 }, + {4, {0x02, 0x13, 0x81, 0x03}, 0, {INTEL_PT_CFE_IP, 1, 3}, 0, 0 }, + {4, {0x02, 0x13, 0x1f, 0x00}, 0, {INTEL_PT_CFE, 0x1f, 0}, 0, 0 }, + {4, {0x02, 0x13, 0x9f, 0xff}, 0, {INTEL_PT_CFE_IP, 0x1f, 0xff}, 0, 0 }, + /* */ + {11, {0x02, 0x53, 0x09, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_EVD, 0x09, 0x7060504030201}, 0, 0 }, + {11, {0x02, 0x53, 0x3f, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_EVD, 0x3f, 0x8070605040302}, 0, 0 }, + /* Terminator */ + {0, {0}, 0, {0, 0, 0}, 0, 0 }, +}; + +static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len) +{ + char desc[INTEL_PT_PKT_DESC_MAX]; + int ret, i; + + for (i = 0; i < len; i++) + pr_debug(" %02x", bytes[i]); + for (; i < INTEL_PT_PKT_MAX_SZ; i++) + pr_debug(" "); + pr_debug(" "); + ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); + if (ret < 0) { + pr_debug("intel_pt_pkt_desc failed!\n"); + return TEST_FAIL; + } + pr_debug("%s\n", desc); + + return TEST_OK; +} + +static void decoding_failed(struct test_data *d) +{ + pr_debug("Decoding failed!\n"); + pr_debug("Decoding: "); + dump_packet(&d->packet, d->bytes, d->len); +} + +static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len, + enum intel_pt_pkt_ctx new_ctx) +{ + decoding_failed(d); + + if (len != d->len) + pr_debug("Expected length: %d Decoded length %d\n", + d->len, len); + + if (packet->type != d->packet.type) + pr_debug("Expected type: %d Decoded type %d\n", + d->packet.type, packet->type); + + if (packet->count != d->packet.count) + pr_debug("Expected count: %d Decoded count %d\n", + d->packet.count, packet->count); + + if (packet->payload != d->packet.payload) + pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n", + (unsigned long long)d->packet.payload, + (unsigned long long)packet->payload); + + if (new_ctx != d->new_ctx) + pr_debug("Expected packet context: %d Decoded packet context %d\n", + d->new_ctx, new_ctx); + + return TEST_FAIL; +} + +static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx ctx) +{ + enum intel_pt_pkt_ctx old_ctx = ctx; + + intel_pt_upd_pkt_ctx(packet, &ctx); + + if (ctx != old_ctx) { + decoding_failed(d); + pr_debug("Packet context changed!\n"); + return TEST_FAIL; + } + + return TEST_OK; +} + +static int test_one(struct test_data *d) +{ + struct intel_pt_pkt packet; + enum intel_pt_pkt_ctx ctx = d->ctx; + int ret; + + memset(&packet, 0xff, sizeof(packet)); + + /* Decode a packet */ + ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx); + if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) { + decoding_failed(d); + pr_debug("intel_pt_get_packet returned %d\n", ret); + return TEST_FAIL; + } + + /* Some packets must always leave the packet context unchanged */ + if (d->ctx_unchanged) { + int err; + + err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX); + if (err) + return err; + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX); + if (err) + return err; + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX); + if (err) + return err; + } + + /* Compare to the expected values */ + if (ret != d->len || packet.type != d->packet.type || + packet.count != d->packet.count || + packet.payload != d->packet.payload || ctx != d->new_ctx) + return fail(d, &packet, ret, ctx); + + pr_debug("Decoded ok:"); + ret = dump_packet(&d->packet, d->bytes, d->len); + + return ret; +} + +/* + * This test feeds byte sequences to the Intel PT packet decoder and checks the + * results. Changes to the packet context are also checked. + */ +int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + struct test_data *d = data; + int ret; + + for (d = data; d->len; d++) { + ret = test_one(d); + if (ret) + return ret; + } + + return TEST_OK; +} -- cgit v1.2.3 From 828143f8da2856014df3102f63f7e4e4dc2d1c22 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:04 +0200 Subject: perf intel-pt: Redefine test_suite to allow for adding more subtests In preparation for adding more Intel PT testing, redefine the test_suite to allow for adding more subtests. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/arch-tests.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 04018b8aa85b..8d5e4a0831d5 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -5,7 +5,17 @@ #ifdef HAVE_AUXTRACE_SUPPORT DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); -DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder); + +static struct test_case intel_pt_tests[] = { + TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder), + { .name = NULL, } +}; + +struct test_suite suite__intel_pt = { + .desc = "Intel PT packet decoder", + .test_cases = intel_pt_tests, +}; + #endif #if defined(__x86_64__) DEFINE_SUITE("x86 bp modify", bp_modify); @@ -18,7 +28,7 @@ struct test_suite *arch_tests[] = { #endif #ifdef HAVE_AUXTRACE_SUPPORT &suite__insn_x86, - &suite__intel_pt_pkt_decoder, + &suite__intel_pt, #endif #if defined(__x86_64__) &suite__bp_modify, -- cgit v1.2.3 From 44a037f54b97e4215a282d39d0f7f28c588f185c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2022 14:18:05 +0200 Subject: perf intel-pt: Add hybrid CPU compatibility test The kernel driver assumes hybrid CPUs will have Intel PT capabilities that are compatible with the boot CPU. Add a test to check that is the case. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221104121805.5264-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/arch-tests.c | 3 +- tools/perf/arch/x86/tests/intel-pt-test.c | 154 ++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a1a1b3c0827..902e9ea9b99e 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -8,6 +8,7 @@ struct test_suite; int test__rdpmc(struct test_suite *test, int subtest); int test__insn_x86(struct test_suite *test, int subtest); int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest); int test__bp_modify(struct test_suite *test, int subtest); int test__x86_sample_parsing(struct test_suite *test, int subtest); diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 8d5e4a0831d5..aae6ea0fe52b 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -8,11 +8,12 @@ DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); static struct test_case intel_pt_tests[] = { TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder), + TEST_CASE("Intel PT hybrid CPU compatibility", intel_pt_hybrid_compat), { .name = NULL, } }; struct test_suite suite__intel_pt = { - .desc = "Intel PT packet decoder", + .desc = "Intel PT", .test_cases = intel_pt_tests, }; diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 42237656f453..70b7f79396b1 100644 --- a/tools/perf/arch/x86/tests/intel-pt-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include +#include +#include #include "intel-pt-decoder/intel-pt-pkt-decoder.h" #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "cpumap.h" /** * struct test_data - Test data. @@ -313,3 +318,152 @@ int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subte return TEST_OK; } + +static int setaffinity(int cpu) +{ + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set)) { + pr_debug("sched_setaffinity() failed for CPU %d\n", cpu); + return -1; + } + return 0; +} + +#define INTEL_PT_ADDR_FILT_CNT_MASK GENMASK(2, 0) +#define INTEL_PT_SUBLEAF_CNT 2 +#define CPUID_REG_CNT 4 + +struct cpuid_result { + union { + struct { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + }; + unsigned int reg[CPUID_REG_CNT]; + }; +}; + +struct pt_caps { + struct cpuid_result subleaf[INTEL_PT_SUBLEAF_CNT]; +}; + +static int get_pt_caps(int cpu, struct pt_caps *caps) +{ + struct cpuid_result r; + int i; + + if (setaffinity(cpu)) + return -1; + + memset(caps, 0, sizeof(*caps)); + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx); + pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i); + pr_debug("eax = 0x%08x\n", r.eax); + pr_debug("ebx = 0x%08x\n", r.ebx); + pr_debug("ecx = 0x%08x\n", r.ecx); + pr_debug("edx = 0x%08x\n", r.edx); + caps->subleaf[i] = r; + } + + return 0; +} + +static bool is_hydrid(void) +{ + unsigned int eax, ebx, ecx, edx = 0; + bool result; + + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + result = edx & BIT(15); + pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n", + result ? "" : "not ", edx); + return result; +} + +static int compare_caps(int cpu, struct pt_caps *caps, struct pt_caps *caps0) +{ + struct pt_caps mask = { /* Mask of bits to check*/ + .subleaf = { + [0] = { + .ebx = GENMASK(8, 0), + .ecx = GENMASK(3, 0), + }, + [1] = { + .eax = GENMASK(31, 16), + .ebx = GENMASK(31, 0), + } + } + }; + unsigned int m, reg, reg0; + int ret = 0; + int i, j; + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + for (j = 0; j < CPUID_REG_CNT; j++) { + m = mask.subleaf[i].reg[j]; + reg = m & caps->subleaf[i].reg[j]; + reg0 = m & caps0->subleaf[i].reg[j]; + if ((reg & reg0) != reg0) { + pr_debug("CPU %d subleaf %d reg %d FAIL %#x vs %#x\n", + cpu, i, j, reg, reg0); + ret = -1; + } + } + } + + m = INTEL_PT_ADDR_FILT_CNT_MASK; + reg = m & caps->subleaf[1].eax; + reg0 = m & caps0->subleaf[1].eax; + if (reg < reg0) { + pr_debug("CPU %d subleaf 1 reg 0 FAIL address filter count %#x vs %#x\n", + cpu, reg, reg0); + ret = -1; + } + + if (!ret) + pr_debug("CPU %d OK\n", cpu); + + return ret; +} + +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) +{ + int max_cpu = cpu__max_cpu().cpu; + struct pt_caps last_caps; + struct pt_caps caps0; + int ret = TEST_OK; + int cpu; + + if (!is_hydrid()) { + test->test_cases[subtest].skip_reason = "not hybrid"; + return TEST_SKIP; + } + + if (get_pt_caps(0, &caps0)) + return TEST_FAIL; + + for (cpu = 1, last_caps = caps0; cpu < max_cpu; cpu++) { + struct pt_caps caps; + + if (get_pt_caps(cpu, &caps)) { + pr_debug("CPU %d not found\n", cpu); + continue; + } + if (!memcmp(&caps, &last_caps, sizeof(caps))) { + pr_debug("CPU %d same caps as previous CPU\n", cpu); + continue; + } + if (compare_caps(cpu, &caps, &caps0)) + ret = TEST_FAIL; + last_caps = caps; + } + + return ret; +} -- cgit v1.2.3 From 6ac73820993c13f30d226f9521f8ffae62acdf42 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Nov 2022 15:30:10 -0300 Subject: perf trace: Add augmenter for clock_gettime's rqtp timespec arg One more before going the BTF way: # perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o,*nanosleep ? pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 ? gpm/1042 ... [continued]: clock_nanosleep()) = 0 1.232 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 1.232 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 327.329 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ... 1002.482 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) = 0 327.329 gpm/1042 ... [continued]: clock_nanosleep()) = 0 2003.947 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 2003.947 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 2327.858 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ... ? crond/1384 ... [continued]: clock_nanosleep()) = 0 3005.382 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ... 3005.382 pool-gsd-smart/2893 ... [continued]: clock_nanosleep()) = 0 3675.633 crond/1384 clock_nanosleep(rqtp: { .tv_sec: 60, .tv_nsec: 0 }, rmtp: 0x7ffcc02b66b0) ... ^C# Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 ++ tools/perf/examples/bpf/augmented_raw_syscalls.c | 28 ++++++++++++++++++++++++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/timespec.c | 21 ++++++++++++++++++ 5 files changed, 55 insertions(+) create mode 100644 tools/perf/trace/beauty/timespec.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5690c33c523b..c93b359abc31 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -924,6 +924,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, }, { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, + { .name = "clock_nanosleep", + .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, }, { .name = "clone", .errpid = true, .nr_args = 5, .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 0599823e8ae1..7dc24c9173a7 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -21,6 +21,13 @@ // FIXME: These should come from system headers typedef char bool; typedef int pid_t; +typedef long long int __s64; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; /* bpf-output associated map */ struct __augmented_syscalls__ { @@ -337,6 +344,27 @@ failure: return 1; /* Failure: don't filter */ } +SEC("!syscalls:sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + static pid_t getpid(void) { return bpf_get_current_pid_tgid(); diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 433dc39053a7..d11ce256f511 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -17,4 +17,5 @@ perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o perf-y += sync_file_range.o +perf-y += timespec.o perf-y += tracepoints/ diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index f527a46ab4e7..4c59edddd6a8 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -244,6 +244,9 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_TIMESPEC syscall_arg__scnprintf_timespec + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c new file mode 100644 index 000000000000..e1a61f092aad --- /dev/null +++ b/tools/perf/trace/beauty/timespec.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2022, Red Hat Inc, Arnaldo Carvalho de Melo + +#include "trace/beauty/beauty.h" +#include +#include + +static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size) +{ + struct timespec *ts = (struct timespec *)arg->augmented.args; + + return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec); +} + +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_timespec(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} -- cgit v1.2.3 From 30b331d2e3bc5c7c95568477d4bf2661b6e6cb3e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 3 Nov 2022 22:14:40 -0700 Subject: perf lock: Allow concurrent record and report To support live monitoring of kernel lock contention without BPF, it should support something like below: # perf lock record -a -o- sleep 1 | perf lock contention -i- contended total wait max wait avg wait type caller 2 10.27 us 6.17 us 5.13 us spinlock load_balance+0xc03 1 5.29 us 5.29 us 5.29 us rwlock:W ep_scan_ready_list+0x54 1 4.12 us 4.12 us 4.12 us spinlock smpboot_thread_fn+0x116 1 3.28 us 3.28 us 3.28 us mutex pipe_read+0x50 To do that, it needs to handle HEAD_ATTR, HEADER_EVENT_UPDATE and HEADER_TRACING_DATA which are generated only for the pipe mode. And setting event handler also should be delayed until it gets the event information. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221104051440.220989-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 68 +++++++++++++++++++++---------- tools/perf/tests/shell/lock_contention.sh | 15 ++++++- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6f79175365a8..0d280093b19a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1390,6 +1390,34 @@ static int dump_info(void) return rc; } +static const struct evsel_str_handler lock_tracepoints[] = { + { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ +}; + +static const struct evsel_str_handler contention_tracepoints[] = { + { "lock:contention_begin", evsel__process_contention_begin, }, + { "lock:contention_end", evsel__process_contention_end, }, +}; + +static int process_event_update(struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + int ret; + + ret = perf_event__process_event_update(tool, event, pevlist); + if (ret < 0) + return ret; + + /* this can return -EEXIST since we call it for each evsel */ + perf_session__set_tracepoints_handlers(session, lock_tracepoints); + perf_session__set_tracepoints_handlers(session, contention_tracepoints); + return 0; +} + typedef int (*tracepoint_handler)(struct evsel *evsel, struct perf_sample *sample); @@ -1545,28 +1573,19 @@ next: print_bad_events(bad, total); } -static const struct evsel_str_handler lock_tracepoints[] = { - { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ - { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ -}; - -static const struct evsel_str_handler contention_tracepoints[] = { - { "lock:contention_begin", evsel__process_contention_begin, }, - { "lock:contention_end", evsel__process_contention_end, }, -}; - static bool force; static int __cmd_report(bool display_info) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .namespaces = perf_event__process_namespaces, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1585,17 +1604,19 @@ static int __cmd_report(bool display_info) symbol_conf.sort_by_name = true; symbol__init(&session->header.env); - if (!perf_session__has_traces(session, "lock record")) - goto out_delete; + if (!data.is_pipe) { + if (!perf_session__has_traces(session, "lock record")) + goto out_delete; - if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; - } + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } - if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; + if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } } if (setup_output_field(false, output_fields)) @@ -1633,9 +1654,12 @@ static int __cmd_contention(int argc, const char **argv) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1698,7 +1722,7 @@ static int __cmd_contention(int argc, const char **argv) pr_err("lock contention BPF setup failed\n"); goto out_delete; } - } else { + } else if (!data.is_pipe) { if (!perf_session__has_traces(session, "lock record")) goto out_delete; diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 04bf604e3c6f..f7bd0d8eb5c3 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -53,7 +53,7 @@ test_bpf() if ! perf lock con -b true > /dev/null 2>&1 ; then echo "[Skip] No BPF support" - exit + return fi # the perf lock contention output goes to the stderr @@ -65,9 +65,22 @@ test_bpf() fi } +test_record_concurrent() +{ + echo "Testing perf lock record and perf lock contention at the same time" + perf lock record -o- -- perf bench sched messaging 2> /dev/null | \ + perf lock contention -i- -E 1 -q 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + err=1 + exit + fi +} + check test_record test_bpf +test_record_concurrent exit ${err} -- cgit v1.2.3 From 9d895e46842908aa49a042e699097df64ab20b7f Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Wed, 9 Nov 2022 11:39:32 +0100 Subject: perf data: Add tracepoint fields when converting to JSON When converting recorded data into JSON format, perf data omits probe variables. Add them to the output in the format "field name": "field value" using tep_print_field: $ perf data convert --to-json output.json // output.json { "linux-perf-json-version": 1, "headers": { ... }, "samples": [ { "timestamp": 29182079082999, "pid": 309194, [...] "__probe_ip": "0x93ee35", "query_string_string": "select 2;", "nxids": "0" } ] } Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com> Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Link: https://lore.kernel.org/r/20221109103932.65675-1-9erthalion6@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-json.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 613d6ae82663..57db59068cb6 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -217,6 +217,26 @@ static int process_sample_event(struct perf_tool *tool, } output_json_format(out, false, 3, "]"); + if (sample->raw_data) { + int i; + struct tep_format_field **fields; + + fields = tep_event_fields(evsel->tp_format); + if (fields) { + i = 0; + while (fields[i]) { + struct trace_seq s; + + trace_seq_init(&s); + tep_print_field(&s, sample->raw_data, fields[i]); + output_json_key_string(out, true, 3, fields[i]->name, s.buffer); + + i++; + } + free(fields); + } + } + output_json_format(out, false, 2, "}"); return 0; } -- cgit v1.2.3 From cf9f67b36303de65596ae7504a2a7573c08876bb Mon Sep 17 00:00:00 2001 From: Kang Minchul Date: Sat, 5 Nov 2022 22:59:32 +0900 Subject: perf print-events: Remove redundant comparison with zero Since variable npmus is unsigned int, comparing with 0 is unnecessary. Signed-off-by: Kang Minchul Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221105135932.81612-1-tegongkang@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index c4d5d87fae2f..6df947df1c0f 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -311,10 +311,8 @@ restart: if ((hybrid_supported == 0) || (hybrid_supported == npmus)) { evt_list[evt_i] = strdup(name); - if (npmus > 0) { - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } + for (j = 0; j < npmus; j++) + zfree(&evt_pmus[j]); } else { for (j = 0; j < hybrid_supported; j++) { evt_list[evt_i++] = evt_pmus[j]; -- cgit v1.2.3 From 612a5337ae7a87893f90de7878b20701b2b17d7d Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 20 Oct 2022 14:45:11 +0100 Subject: perf vendor events: Add Arm Neoverse V2 PMU events Rename the neoverse-n2 folder to make it clear that it includes V2, and add V2 to mapfile.csv. V2 has the same events as N2, visible by running the following command in the ARM-software/data github repo [1]: diff pmu/neoverse-v2.json pmu/neoverse-n2.json | grep code Testing: $ perf test pmu 10: PMU events : 10.1: PMU event table sanity : Ok 10.2: PMU event map aliases : Ok 10.3: Parsing of PMU event table metrics : Ok 10.4: Parsing of PMU event table metrics with fake PMUs : Ok [1]: https://github.com/ARM-software/data Reviewed-by: Nick Forrington Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20221020134512.1345013-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/neoverse-n2-v2/branch.json | 8 ++ .../arch/arm64/arm/neoverse-n2-v2/bus.json | 20 +++ .../arch/arm64/arm/neoverse-n2-v2/cache.json | 155 +++++++++++++++++++++ .../arch/arm64/arm/neoverse-n2-v2/exception.json | 47 +++++++ .../arch/arm64/arm/neoverse-n2-v2/instruction.json | 143 +++++++++++++++++++ .../arch/arm64/arm/neoverse-n2-v2/memory.json | 41 ++++++ .../arch/arm64/arm/neoverse-n2-v2/pipeline.json | 23 +++ .../arch/arm64/arm/neoverse-n2-v2/spe.json | 14 ++ .../arch/arm64/arm/neoverse-n2-v2/trace.json | 29 ++++ .../arch/arm64/arm/neoverse-n2/branch.json | 8 -- .../pmu-events/arch/arm64/arm/neoverse-n2/bus.json | 20 --- .../arch/arm64/arm/neoverse-n2/cache.json | 155 --------------------- .../arch/arm64/arm/neoverse-n2/exception.json | 47 ------- .../arch/arm64/arm/neoverse-n2/instruction.json | 143 ------------------- .../arch/arm64/arm/neoverse-n2/memory.json | 41 ------ .../arch/arm64/arm/neoverse-n2/pipeline.json | 23 --- .../pmu-events/arch/arm64/arm/neoverse-n2/spe.json | 14 -- .../arch/arm64/arm/neoverse-n2/trace.json | 29 ---- tools/perf/pmu-events/arch/arm64/mapfile.csv | 3 +- 19 files changed, 482 insertions(+), 481 deletions(-) create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json delete mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json new file mode 100644 index 000000000000..e57cd55937c6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json @@ -0,0 +1,143 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "ASE_INST_SPEC" + }, + { + "ArchStdEvent": "SVE_INST_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_EMPTY_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_FAULT_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT8_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT16_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT32_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT64_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json new file mode 100644 index 000000000000..7b2b21ac150f --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "REMOTE_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json new file mode 100644 index 000000000000..3116135c59e2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "TRB_WRAP" + }, + { + "ArchStdEvent": "TRCEXTOUT0" + }, + { + "ArchStdEvent": "TRCEXTOUT1" + }, + { + "ArchStdEvent": "TRCEXTOUT2" + }, + { + "ArchStdEvent": "TRCEXTOUT3" + }, + { + "ArchStdEvent": "CTI_TRIGOUT4" + }, + { + "ArchStdEvent": "CTI_TRIGOUT5" + }, + { + "ArchStdEvent": "CTI_TRIGOUT6" + }, + { + "ArchStdEvent": "CTI_TRIGOUT7" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json deleted file mode 100644 index 79f2016c53b0..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "ArchStdEvent": "BR_MIS_PRED" - }, - { - "ArchStdEvent": "BR_PRED" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json deleted file mode 100644 index 579c1c993d17..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "ArchStdEvent": "CPU_CYCLES" - }, - { - "ArchStdEvent": "BUS_ACCESS" - }, - { - "ArchStdEvent": "BUS_CYCLES" - }, - { - "ArchStdEvent": "BUS_ACCESS_RD" - }, - { - "ArchStdEvent": "BUS_ACCESS_WR" - }, - { - "ArchStdEvent": "CNT_CYCLES" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json deleted file mode 100644 index 0141f749bff3..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json +++ /dev/null @@ -1,155 +0,0 @@ -[ - { - "ArchStdEvent": "L1I_CACHE_REFILL" - }, - { - "ArchStdEvent": "L1I_TLB_REFILL" - }, - { - "ArchStdEvent": "L1D_CACHE_REFILL" - }, - { - "ArchStdEvent": "L1D_CACHE" - }, - { - "ArchStdEvent": "L1D_TLB_REFILL" - }, - { - "ArchStdEvent": "L1I_CACHE" - }, - { - "ArchStdEvent": "L1D_CACHE_WB" - }, - { - "ArchStdEvent": "L2D_CACHE" - }, - { - "ArchStdEvent": "L2D_CACHE_REFILL" - }, - { - "ArchStdEvent": "L2D_CACHE_WB" - }, - { - "ArchStdEvent": "L2D_CACHE_ALLOCATE" - }, - { - "ArchStdEvent": "L1D_TLB" - }, - { - "ArchStdEvent": "L1I_TLB" - }, - { - "ArchStdEvent": "L3D_CACHE_ALLOCATE" - }, - { - "ArchStdEvent": "L3D_CACHE_REFILL" - }, - { - "ArchStdEvent": "L3D_CACHE" - }, - { - "ArchStdEvent": "L2D_TLB_REFILL" - }, - { - "ArchStdEvent": "L2D_TLB" - }, - { - "ArchStdEvent": "DTLB_WALK" - }, - { - "ArchStdEvent": "ITLB_WALK" - }, - { - "ArchStdEvent": "LL_CACHE_RD" - }, - { - "ArchStdEvent": "LL_CACHE_MISS_RD" - }, - { - "ArchStdEvent": "L1D_CACHE_LMISS_RD" - }, - { - "ArchStdEvent": "L1D_CACHE_RD" - }, - { - "ArchStdEvent": "L1D_CACHE_WR" - }, - { - "ArchStdEvent": "L1D_CACHE_REFILL_RD" - }, - { - "ArchStdEvent": "L1D_CACHE_REFILL_WR" - }, - { - "ArchStdEvent": "L1D_CACHE_REFILL_INNER" - }, - { - "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" - }, - { - "ArchStdEvent": "L1D_CACHE_WB_VICTIM" - }, - { - "ArchStdEvent": "L1D_CACHE_WB_CLEAN" - }, - { - "ArchStdEvent": "L1D_CACHE_INVAL" - }, - { - "ArchStdEvent": "L1D_TLB_REFILL_RD" - }, - { - "ArchStdEvent": "L1D_TLB_REFILL_WR" - }, - { - "ArchStdEvent": "L1D_TLB_RD" - }, - { - "ArchStdEvent": "L1D_TLB_WR" - }, - { - "ArchStdEvent": "L2D_CACHE_RD" - }, - { - "ArchStdEvent": "L2D_CACHE_WR" - }, - { - "ArchStdEvent": "L2D_CACHE_REFILL_RD" - }, - { - "ArchStdEvent": "L2D_CACHE_REFILL_WR" - }, - { - "ArchStdEvent": "L2D_CACHE_WB_VICTIM" - }, - { - "ArchStdEvent": "L2D_CACHE_WB_CLEAN" - }, - { - "ArchStdEvent": "L2D_CACHE_INVAL" - }, - { - "ArchStdEvent": "L2D_TLB_REFILL_RD" - }, - { - "ArchStdEvent": "L2D_TLB_REFILL_WR" - }, - { - "ArchStdEvent": "L2D_TLB_RD" - }, - { - "ArchStdEvent": "L2D_TLB_WR" - }, - { - "ArchStdEvent": "L3D_CACHE_RD" - }, - { - "ArchStdEvent": "L1I_CACHE_LMISS" - }, - { - "ArchStdEvent": "L2D_CACHE_LMISS_RD" - }, - { - "ArchStdEvent": "L3D_CACHE_LMISS_RD" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json deleted file mode 100644 index 344a2d552ad5..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json +++ /dev/null @@ -1,47 +0,0 @@ -[ - { - "ArchStdEvent": "EXC_TAKEN" - }, - { - "ArchStdEvent": "MEMORY_ERROR" - }, - { - "ArchStdEvent": "EXC_UNDEF" - }, - { - "ArchStdEvent": "EXC_SVC" - }, - { - "ArchStdEvent": "EXC_PABORT" - }, - { - "ArchStdEvent": "EXC_DABORT" - }, - { - "ArchStdEvent": "EXC_IRQ" - }, - { - "ArchStdEvent": "EXC_FIQ" - }, - { - "ArchStdEvent": "EXC_SMC" - }, - { - "ArchStdEvent": "EXC_HVC" - }, - { - "ArchStdEvent": "EXC_TRAP_PABORT" - }, - { - "ArchStdEvent": "EXC_TRAP_DABORT" - }, - { - "ArchStdEvent": "EXC_TRAP_OTHER" - }, - { - "ArchStdEvent": "EXC_TRAP_IRQ" - }, - { - "ArchStdEvent": "EXC_TRAP_FIQ" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json deleted file mode 100644 index e57cd55937c6..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json +++ /dev/null @@ -1,143 +0,0 @@ -[ - { - "ArchStdEvent": "SW_INCR" - }, - { - "ArchStdEvent": "INST_RETIRED" - }, - { - "ArchStdEvent": "EXC_RETURN" - }, - { - "ArchStdEvent": "CID_WRITE_RETIRED" - }, - { - "ArchStdEvent": "INST_SPEC" - }, - { - "ArchStdEvent": "TTBR_WRITE_RETIRED" - }, - { - "ArchStdEvent": "BR_RETIRED" - }, - { - "ArchStdEvent": "BR_MIS_PRED_RETIRED" - }, - { - "ArchStdEvent": "OP_RETIRED" - }, - { - "ArchStdEvent": "OP_SPEC" - }, - { - "ArchStdEvent": "LDREX_SPEC" - }, - { - "ArchStdEvent": "STREX_PASS_SPEC" - }, - { - "ArchStdEvent": "STREX_FAIL_SPEC" - }, - { - "ArchStdEvent": "STREX_SPEC" - }, - { - "ArchStdEvent": "LD_SPEC" - }, - { - "ArchStdEvent": "ST_SPEC" - }, - { - "ArchStdEvent": "DP_SPEC" - }, - { - "ArchStdEvent": "ASE_SPEC" - }, - { - "ArchStdEvent": "VFP_SPEC" - }, - { - "ArchStdEvent": "PC_WRITE_SPEC" - }, - { - "ArchStdEvent": "CRYPTO_SPEC" - }, - { - "ArchStdEvent": "BR_IMMED_SPEC" - }, - { - "ArchStdEvent": "BR_RETURN_SPEC" - }, - { - "ArchStdEvent": "BR_INDIRECT_SPEC" - }, - { - "ArchStdEvent": "ISB_SPEC" - }, - { - "ArchStdEvent": "DSB_SPEC" - }, - { - "ArchStdEvent": "DMB_SPEC" - }, - { - "ArchStdEvent": "RC_LD_SPEC" - }, - { - "ArchStdEvent": "RC_ST_SPEC" - }, - { - "ArchStdEvent": "ASE_INST_SPEC" - }, - { - "ArchStdEvent": "SVE_INST_SPEC" - }, - { - "ArchStdEvent": "FP_HP_SPEC" - }, - { - "ArchStdEvent": "FP_SP_SPEC" - }, - { - "ArchStdEvent": "FP_DP_SPEC" - }, - { - "ArchStdEvent": "SVE_PRED_SPEC" - }, - { - "ArchStdEvent": "SVE_PRED_EMPTY_SPEC" - }, - { - "ArchStdEvent": "SVE_PRED_FULL_SPEC" - }, - { - "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC" - }, - { - "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC" - }, - { - "ArchStdEvent": "SVE_LDFF_SPEC" - }, - { - "ArchStdEvent": "SVE_LDFF_FAULT_SPEC" - }, - { - "ArchStdEvent": "FP_SCALE_OPS_SPEC" - }, - { - "ArchStdEvent": "FP_FIXED_OPS_SPEC" - }, - { - "ArchStdEvent": "ASE_SVE_INT8_SPEC" - }, - { - "ArchStdEvent": "ASE_SVE_INT16_SPEC" - }, - { - "ArchStdEvent": "ASE_SVE_INT32_SPEC" - }, - { - "ArchStdEvent": "ASE_SVE_INT64_SPEC" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json deleted file mode 100644 index 7b2b21ac150f..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - { - "ArchStdEvent": "MEM_ACCESS" - }, - { - "ArchStdEvent": "REMOTE_ACCESS" - }, - { - "ArchStdEvent": "MEM_ACCESS_RD" - }, - { - "ArchStdEvent": "MEM_ACCESS_WR" - }, - { - "ArchStdEvent": "UNALIGNED_LD_SPEC" - }, - { - "ArchStdEvent": "UNALIGNED_ST_SPEC" - }, - { - "ArchStdEvent": "UNALIGNED_LDST_SPEC" - }, - { - "ArchStdEvent": "LDST_ALIGN_LAT" - }, - { - "ArchStdEvent": "LD_ALIGN_LAT" - }, - { - "ArchStdEvent": "ST_ALIGN_LAT" - }, - { - "ArchStdEvent": "MEM_ACCESS_CHECKED" - }, - { - "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" - }, - { - "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json deleted file mode 100644 index f9fae15f7555..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json +++ /dev/null @@ -1,23 +0,0 @@ -[ - { - "ArchStdEvent": "STALL_FRONTEND" - }, - { - "ArchStdEvent": "STALL_BACKEND" - }, - { - "ArchStdEvent": "STALL" - }, - { - "ArchStdEvent": "STALL_SLOT_BACKEND" - }, - { - "ArchStdEvent": "STALL_SLOT_FRONTEND" - }, - { - "ArchStdEvent": "STALL_SLOT" - }, - { - "ArchStdEvent": "STALL_BACKEND_MEM" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json deleted file mode 100644 index 20f2165c85fe..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "ArchStdEvent": "SAMPLE_POP" - }, - { - "ArchStdEvent": "SAMPLE_FEED" - }, - { - "ArchStdEvent": "SAMPLE_FILTRATE" - }, - { - "ArchStdEvent": "SAMPLE_COLLISION" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json deleted file mode 100644 index 3116135c59e2..000000000000 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json +++ /dev/null @@ -1,29 +0,0 @@ -[ - { - "ArchStdEvent": "TRB_WRAP" - }, - { - "ArchStdEvent": "TRCEXTOUT0" - }, - { - "ArchStdEvent": "TRCEXTOUT1" - }, - { - "ArchStdEvent": "TRCEXTOUT2" - }, - { - "ArchStdEvent": "TRCEXTOUT3" - }, - { - "ArchStdEvent": "CTI_TRIGOUT4" - }, - { - "ArchStdEvent": "CTI_TRIGOUT5" - }, - { - "ArchStdEvent": "CTI_TRIGOUT6" - }, - { - "ArchStdEvent": "CTI_TRIGOUT7" - } -] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index ad502d00f460..f134e833c069 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -34,7 +34,8 @@ 0x00000000410fd460,v1,arm/cortex-a510,core 0x00000000410fd470,v1,arm/cortex-a710,core 0x00000000410fd480,v1,arm/cortex-x2,core -0x00000000410fd490,v1,arm/neoverse-n2,core +0x00000000410fd490,v1,arm/neoverse-n2-v2,core +0x00000000410fd4f0,v1,arm/neoverse-n2-v2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core -- cgit v1.2.3 From 4ea0be1f0db588afdb2d4d94dfa921b01f34a3c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:07 -0800 Subject: perf stat: Increase metric length to align outputs When perf stat is called with very detailed events, the output doesn't align well like below: $ sudo perf stat -a -ddd sleep 1 Performance counter stats for 'system wide': 8,020.23 msec cpu-clock # 7.997 CPUs utilized 3,970 context-switches # 494.998 /sec 169 cpu-migrations # 21.072 /sec 586 page-faults # 73.065 /sec 649,568,060 cycles # 0.081 GHz (30.42%) 304,044,345 instructions # 0.47 insn per cycle (38.40%) 60,313,022 branches # 7.520 M/sec (38.89%) 2,766,919 branch-misses # 4.59% of all branches (39.26%) 74,422,951 L1-dcache-loads # 9.279 M/sec (39.39%) 8,025,568 L1-dcache-load-misses # 10.78% of all L1-dcache accesses (39.22%) 3,314,995 LLC-loads # 413.329 K/sec (30.83%) 1,225,619 LLC-load-misses # 36.97% of all LL-cache accesses (30.45%) L1-icache-loads 20,420,493 L1-icache-load-misses # 0.00% of all L1-icache accesses (30.29%) 58,017,947 dTLB-loads # 7.234 M/sec (30.37%) 704,677 dTLB-load-misses # 1.21% of all dTLB cache accesses (30.27%) 234,225 iTLB-loads # 29.204 K/sec (30.29%) 417,166 iTLB-load-misses # 178.10% of all iTLB cache accesses (30.32%) L1-dcache-prefetches L1-dcache-prefetch-misses 1.002947355 seconds time elapsed Increase the METRIC_LEN by 3 so that it can align properly. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 657434cd29ee..576283afa319 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -218,7 +218,7 @@ struct outstate { struct evsel *evsel; }; -#define METRIC_LEN 35 +#define METRIC_LEN 38 static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) -- cgit v1.2.3 From 81a02c6577ecfee7056ccafbd028984d0d670c0c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:08 -0800 Subject: perf stat: Clear screen only if output file is a tty The --interval-clear option makes perf stat to clear the terminal at each interval. But it doesn't need to clear the screen when it saves to a file. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 576283afa319..ccb804546d5a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -890,7 +890,7 @@ static void print_interval(struct perf_stat_config *config, FILE *output = config->output; static int num_print_interval; - if (config->interval_clear) + if (config->interval_clear && isatty(fileno(output))) puts(CONSOLE_CLEAR); if (!config->iostat_run && !config->json_output) -- cgit v1.2.3 From f4e55f88da923f39f0b76edc3da3c52d0b72d429 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:09 -0800 Subject: perf stat: Move common code in print_metric_headers() The struct perf_stat_output_ctx is set in a loop with the same values. Move the code out of the loop and keep the loop minimal. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ccb804546d5a..173f4715189c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -835,11 +835,16 @@ static void print_metric_headers(struct perf_stat_config *config, struct evlist *evlist, const char *prefix, bool no_indent) { - struct perf_stat_output_ctx out; struct evsel *counter; struct outstate os = { .fh = config->output }; + struct perf_stat_output_ctx out = { + .ctx = &os, + .print_metric = print_metric_header, + .new_line = new_line_metric, + .force_header = true, + }; bool first = true; if (config->json_output && !config->interval) @@ -863,13 +868,11 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { os.evsel = counter; - out.ctx = &os; - out.print_metric = print_metric_header; + if (!first && config->json_output) fprintf(config->output, ", "); first = false; - out.new_line = new_line_metric; - out.force_header = true; + perf_stat__print_shadow_stats(config, counter, 0, 0, &out, -- cgit v1.2.3 From fdc7d6082459aa6705dd39b827214f6cfc1fa054 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:10 -0800 Subject: perf stat: Fix --metric-only --json output Currently it prints all metric headers for JSON output. But actually it skips some metrics with valid_only_metric(). So the output looks like: $ perf stat --metric-only --json true {"unit" : "CPUs utilized", "unit" : "/sec", "unit" : "/sec", "unit" : "/sec", "unit" : "GHz", "unit" : "insn per cycle", "unit" : "/sec", "unit" : "branch-misses of all branches"} {"metric-value" : "3.861"}{"metric-value" : "0.79"}{"metric-value" : "3.04"} As you can see there are 8 units in the header but only 3 metric-values are there. It should skip the unused headers as well. Also each unit should be printed as a separate object like metric values. With this patch: $ perf stat --metric-only --json true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"metric-value" : "4.166"}{"metric-value" : "0.73"}{"metric-value" : "2.96"} Fixes: df936cadfb58ba93 ("perf stat: Add JSON output option") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Claire Jensen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 173f4715189c..5c10cf49fd12 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -430,12 +430,12 @@ static void print_metric_header(struct perf_stat_config *config, os->evsel->priv != os->evsel->evlist->selected->priv) return; - if (!valid_only_metric(unit) && !config->json_output) + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) - fprintf(os->fh, "\"unit\" : \"%s\"", unit); + fprintf(os->fh, "{\"unit\" : \"%s\"}", unit); else if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else @@ -845,10 +845,6 @@ static void print_metric_headers(struct perf_stat_config *config, .new_line = new_line_metric, .force_header = true, }; - bool first = true; - - if (config->json_output && !config->interval) - fprintf(config->output, "{"); if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); @@ -869,18 +865,12 @@ static void print_metric_headers(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { os.evsel = counter; - if (!first && config->json_output) - fprintf(config->output, ", "); - first = false; - perf_stat__print_shadow_stats(config, counter, 0, 0, &out, &config->metric_events, &rt_stat); } - if (config->json_output) - fprintf(config->output, "}"); fputc('\n', config->output); } @@ -952,14 +942,8 @@ static void print_interval(struct perf_stat_config *config, } } - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && !config->json_output) + if ((num_print_interval == 0 || config->interval_clear) && metric_only) print_metric_headers(config, evlist, " ", true); - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && config->json_output) { - fprintf(output, "{"); - print_metric_headers(config, evlist, " ", true); - } if (++num_print_interval == 25) num_print_interval = 0; } -- cgit v1.2.3 From 6d0a7e394eabd456f70c2f2f8e958b2343074360 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:11 -0800 Subject: perf stat: Do not indent headers for JSON Currently --metric-only with --json indents header lines. This is not needed for JSON. $ perf stat -aA --metric-only -j true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"cpu" : "0", {"metric-value" : "0.101"}{"metric-value" : "0.86"}{"metric-value" : "1.91"} {"cpu" : "1", {"metric-value" : "0.102"}{"metric-value" : "0.87"}{"metric-value" : "2.02"} {"cpu" : "2", {"metric-value" : "0.085"}{"metric-value" : "1.02"}{"metric-value" : "1.69"} ... Note that the other lines are broken JSON, but it will be handled later. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5c10cf49fd12..99bc0db8f068 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -849,7 +849,7 @@ static void print_metric_headers(struct perf_stat_config *config, if (prefix && !config->json_output) fprintf(config->output, "%s", prefix); - if (!config->csv_output && !no_indent) + if (!config->csv_output && !config->json_output && !no_indent) fprintf(config->output, "%*s", aggr_header_lens[config->aggr_mode], ""); if (config->csv_output) { -- cgit v1.2.3 From 1cc7642abba7b281ecd836bfb56fc6dedac32555 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:12 -0800 Subject: perf stat: Add header for interval in JSON output It missed to print a matching header line for intervals. Before: # perf stat -a -e cycles,instructions --metric-only -j -I 500 {"unit" : "insn per cycle"} {"interval" : 0.500544283}{"metric-value" : "1.96"} ^C After: # perf stat -a -e cycles,instructions --metric-only -j -I 500 {"unit" : "sec"}{"unit" : "insn per cycle"} {"interval" : 0.500515681}{"metric-value" : "2.31"} ^C Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 99bc0db8f068..2a08c66121ec 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -858,6 +858,10 @@ static void print_metric_headers(struct perf_stat_config *config, if (!config->iostat_run) fputs(aggr_header_csv[config->aggr_mode], config->output); } + if (config->json_output) { + if (config->interval) + fputs("{\"unit\" : \"sec\"}", config->output); + } if (config->iostat_run) iostat_print_header_prefix(config); -- cgit v1.2.3 From f1db5a1d1d4de248534567fd95efa570bc693f73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:13 -0800 Subject: perf stat: Fix condition in print_interval() The num_print_interval and config->interval_clear should be checked together like other places like later in the function. Otherwise, the --interval-clear option could print the headers for the CSV or JSON output unnecessarily. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2a08c66121ec..7b2ec400813e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -900,8 +900,8 @@ static void print_interval(struct perf_stat_config *config, sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) ts->tv_sec, ts->tv_nsec); - if ((num_print_interval == 0 && !config->csv_output && !config->json_output) - || config->interval_clear) { + if ((num_print_interval == 0 || config->interval_clear) && + !config->csv_output && !config->json_output) { switch (config->aggr_mode) { case AGGR_NODE: fprintf(output, "# time node cpus"); -- cgit v1.2.3 From 20e2e31779377b36ada04c06d572ce0b4e234bb1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Nov 2022 13:33:14 -0800 Subject: perf stat: Consolidate condition to print metrics The pm variable holds an appropriate function to print metrics for CSV anf JSON already. So we can combine the if statement to simplify the code a little bit. This also matches to the above condition for non-CSV and non-JSON case. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221107213314.3239159-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7b2ec400813e..cb2a116bded6 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -600,9 +600,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); print_running(config, run, ena); - if (config->csv_output) - pm(config, &os, NULL, NULL, "", 0); - else if (config->json_output) + if (config->csv_output || config->json_output) pm(config, &os, NULL, NULL, "", 0); return; } -- cgit v1.2.3 From a80e0e156ca6d7c339e314d15db2f038755da6c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Nov 2022 19:22:43 -0800 Subject: perf stat: Fix summary output in CSV with --metric-only It should not print "summary" for each event when --metric-only is set. Before: $ sudo perf stat -a --per-socket --summary -x, --metric-only true time,socket,cpusGhz,insn per cycle,branch-misses of all branches, 0.000709079,S0,8,0.893,2.40,0.45, S0,8, summary, summary, summary, summary, summary,0.893, summary,2.40, summary, summary,0.45, After: $ sudo perf stat -a --per-socket --summary -x, --metric-only true time,socket,cpusGHz,insn per cycle,branch-misses of all branches, 0.000882297,S0,8,0.598,1.64,0.64, summary,S0,8,0.598,1.64,0.64, Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221112032244.1077370-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cc206781c57c..6ea731403270 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -549,7 +549,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } if (!config->no_csv_summary && config->csv_output && - config->summary && !config->interval) { + config->summary && !config->interval && !config->metric_only) { fprintf(config->output, "%16s%s", "summary", config->csv_sep); } @@ -732,8 +732,13 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - if (prefix && metric_only) - fprintf(output, "%s", prefix); + if (metric_only) { + if (prefix) + fprintf(output, "%s", prefix); + else if (config->summary && !config->no_csv_summary && + config->csv_output && !config->interval) + fprintf(output, "%16s%s", "summary", config->csv_sep); + } first = true; evlist__for_each_entry(evlist, counter) { -- cgit v1.2.3 From 7565f9617efac0c0c8e2dbd08dbe0695d56684f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Nov 2022 19:22:44 -0800 Subject: perf stat: Add missing separator in the CSV header It should have a comma after 'cpus' for socket and die aggregation mode. The output of the following command shows the issue. $ sudo perf stat -a --per-socket -x, --metric-only -I1 true Before: +--- here V time,socket,cpusGhz,insn per cycle,branch-misses of all branches, 0.000908461,S0,8,0.950,1.65,1.21, After: time,socket,cpus,GHz,insn per cycle,branch-misses of all branches, 0.000683094,S0,8,0.593,2.00,0.60, Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221112032244.1077370-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 6ea731403270..2a3c1e0098b9 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -828,8 +828,8 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", - [AGGR_DIE] = "die,cpus", - [AGGR_SOCKET] = "socket,cpus", + [AGGR_DIE] = "die,cpus,", + [AGGR_SOCKET] = "socket,cpus,", [AGGR_NONE] = "cpu,", [AGGR_THREAD] = "comm-pid,", [AGGR_NODE] = "node,", -- cgit v1.2.3 From e5f4afbe395f1248e7501d470118a2a947fe87e6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:14 -0800 Subject: perf pmu: Remove mostly unused 'struct perf_pmu' 'is_hybrid' member Replace usage with perf_pmu__is_hybrid(). Suggested-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +---- tools/perf/util/evsel.h | 2 +- tools/perf/util/pmu.c | 3 +-- tools/perf/util/pmu.h | 1 - tools/perf/util/stat.c | 11 +++-------- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdde5b5f8ad2..ca6abb64c91d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3124,11 +3124,8 @@ void evsel__zero_per_pkg(struct evsel *evsel) } } -bool evsel__is_hybrid(struct evsel *evsel) +bool evsel__is_hybrid(const struct evsel *evsel) { - if (evsel->pmu) - return evsel->pmu->is_hybrid; - return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 989865e16aad..467bb0b32fef 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -498,7 +498,7 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); void evsel__zero_per_pkg(struct evsel *evsel); -bool evsel__is_hybrid(struct evsel *evsel); +bool evsel__is_hybrid(const struct evsel *evsel); struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6a86e6af0903..48e7be6f3baa 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -980,7 +980,6 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->is_hybrid = is_hybrid; pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -992,7 +991,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); - if (pmu->is_hybrid) + if (is_hybrid) list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 68e15c38ae71..0d556d02ce52 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -40,7 +40,6 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; - bool is_hybrid; bool auxtrace; int max_precise; struct perf_event_attr *default_config; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 3a432a949d46..acf0edf5fdd1 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -604,15 +604,10 @@ static void evsel__merge_aliases(struct evsel *evsel) } } -static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config) +static bool evsel__should_merge_hybrid(const struct evsel *evsel, + const struct perf_stat_config *config) { - struct perf_pmu *pmu; - - if (!config->hybrid_merge) - return false; - - pmu = evsel__find_pmu(evsel); - return pmu && pmu->is_hybrid; + return config->hybrid_merge && evsel__is_hybrid(evsel); } static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) -- cgit v1.2.3 From fe13d43d07393f46957c2fc09f09f097f7969a5d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:15 -0800 Subject: perf pmu: Add data structure documentation Add documentation to 'struct perf_pmu' and the associated structs of 'perf_pmu_alias' and 'perf_pmu_format'. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 16 +++++++ tools/perf/util/pmu.h | 122 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 132 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 48e7be6f3baa..057e1528c32f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -31,10 +31,26 @@ struct perf_pmu perf_pmu__fake; +/** + * struct perf_pmu_format - Values from a format file read from + * /devices/cpu/format/ held in struct perf_pmu. + * + * For example, the contents of /devices/cpu/format/event may be + * "config:0-7" and will be represented here as name="event", + * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set. + */ struct perf_pmu_format { + /** @name: The modifier/file name. */ char *name; + /** + * @value : Which config value the format relates to. Supported values + * are from PERF_PMU_FORMAT_VALUE_CONFIG to + * PERF_PMU_FORMAT_VALUE_CONFIG_END. + */ int value; + /** @bits: Which config bits are set by this format value. */ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + /** @list: Element on list within struct perf_pmu. */ struct list_head list; }; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 0d556d02ce52..ee02e1ef9187 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -33,30 +33,101 @@ struct perf_pmu_caps { struct list_head list; }; +/** + * struct perf_pmu - hi + */ struct perf_pmu { + /** @name: The name of the PMU such as "cpu". */ char *name; + /** + * @alias_name: Optional alternate name for the PMU determined in + * architecture specific code. + */ char *alias_name; + /** + * @id: Optional PMU identifier read from + * /bus/event_source/devices//identifier. + */ char *id; + /** + * @type: Perf event attributed type value, read from + * /bus/event_source/devices//type. + */ __u32 type; + /** + * @selectable: Can the PMU name be selected as if it were an event? + */ bool selectable; + /** + * @is_uncore: Is the PMU not within the CPU core? Determined by the + * presence of /bus/event_source/devices//cpumask. + */ bool is_uncore; + /** + * @auxtrace: Are events auxiliary events? Determined in architecture + * specific code. + */ bool auxtrace; + /** + * @max_precise: Number of levels of :ppp precision supported by the + * PMU, read from + * /bus/event_source/devices//caps/max_precise. + */ int max_precise; + /** + * @default_config: Optional default perf_event_attr determined in + * architecture specific code. + */ struct perf_event_attr *default_config; + /** + * @cpus: Empty or the contents of either of: + * /bus/event_source/devices//cpumask. + * /bus/event_source/devices//cpus. + */ struct perf_cpu_map *cpus; - struct list_head format; /* HEAD struct perf_pmu_format -> list */ - struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + /** + * @format: Holds the contents of files read from + * /bus/event_source/devices//format/. The contents specify + * which event parameter changes what config, config1 or config2 bits. + */ + struct list_head format; + /** + * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an + * event read from /bus/event_source/devices//events/ or + * from json events in pmu-events.c. + */ + struct list_head aliases; + /** @caps_initialized: Has the list caps been initialized? */ bool caps_initialized; + /** @nr_caps: The length of the list caps. */ u32 nr_caps; - struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ - struct list_head list; /* ELEM */ + /** + * @caps: Holds the contents of files read from + * /bus/event_source/devices//caps/. + * + * The contents are pairs of the filename with the value of its + * contents, for example, max_precise (see above) may have a value of 3. + */ + struct list_head caps; + /** @list: Element on pmus list in pmu.c. */ + struct list_head list; + /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */ struct list_head hybrid_list; + /** + * @missing_features: Features to inhibit when events on this PMU are + * opened. + */ struct { + /** + * @exclude_guest: Disables perf_event_attr exclude_guest and + * exclude_host. + */ bool exclude_guest; } missing_features; }; +/** @perf_pmu__fake: A special global PMU used for testing. */ extern struct perf_pmu perf_pmu__fake; struct perf_pmu_info { @@ -70,21 +141,60 @@ struct perf_pmu_info { #define UNIT_MAX_LEN 31 /* max length for event unit name */ +/** + * struct perf_pmu_alias - An event either read from sysfs or builtin in + * pmu-events.c, created by parsing the pmu-events json files. + */ struct perf_pmu_alias { + /** @name: Name of the event like "mem-loads". */ char *name; + /** @desc: Optional short description of the event. */ char *desc; + /** @long_desc: Optional long description. */ char *long_desc; + /** + * @topic: Optional topic such as cache or pipeline, particularly for + * json events. + */ char *topic; + /** + * @str: Comma separated parameter list like + * "event=0xcd,umask=0x1,ldlat=0x3". + */ char *str; - struct list_head terms; /* HEAD struct parse_events_term -> list */ - struct list_head list; /* ELEM */ + /** @terms: Owned list of the original parsed parameters. */ + struct list_head terms; + /** @list: List element of struct perf_pmu aliases. */ + struct list_head list; + /** @unit: Units for the event, such as bytes or cache lines. */ char unit[UNIT_MAX_LEN+1]; + /** @scale: Value to scale read counter values by. */ double scale; + /** + * @per_pkg: Does the file + * /bus/event_source/devices//events/.per-pkg or + * equivalent json value exist and have the value 1. + */ bool per_pkg; + /** + * @snapshot: Does the file + * /bus/event_source/devices//events/.snapshot + * exist and have the value 1. + */ bool snapshot; + /** + * @deprecated: Is the event hidden and so not shown in perf list by + * default. + */ bool deprecated; + /** + * @metric_expr: A metric expression associated with an event. Doing + * this makes little sense due to scale and unit applying to both. + */ char *metric_expr; + /** @metric_name: A name for the metric. unit applying to both. */ char *metric_name; + /** @pmu_name: The name copied from struct perf_pmu. */ char *pmu_name; }; -- cgit v1.2.3 From 1504b6f97bad166b484d6f27dc99746fdca5f467 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:16 -0800 Subject: tools lib api fs tracing_path: Add scandir alphasort tracing_events__opendir() allows iteration over files in /tracing/events but with an arbitrary sort order. Add a scandir alternative where the results are alphabetically sorted. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/tracing_path.c | 16 ++++++++++++++++ tools/lib/api/fs/tracing_path.h | 1 + 2 files changed, 17 insertions(+) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 5afb11b30fca..b8e457c841ab 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -113,6 +113,22 @@ DIR *tracing_events__opendir(void) return dir; } +int tracing_events__scandir_alphasort(struct dirent ***namelist) +{ + char *path = get_tracing_file("events"); + int ret; + + if (!path) { + *namelist = NULL; + return 0; + } + + ret = scandir(path, namelist, NULL, alphasort); + put_events_file(path); + + return ret; +} + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) { diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h index a19136b086dc..fc6347c11deb 100644 --- a/tools/lib/api/fs/tracing_path.h +++ b/tools/lib/api/fs/tracing_path.h @@ -6,6 +6,7 @@ #include DIR *tracing_events__opendir(void); +int tracing_events__scandir_alphasort(struct dirent ***namelist); void tracing_path_set(const char *mountpoint); const char *tracing_path_mount(void); -- cgit v1.2.3 From d74060c03368063c64e63004319e2a39930c4297 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:17 -0800 Subject: perf tracepoint: Sort events in iterator In print_tracepoint_events() use tracing_events__scandir_alphasort() and scandir alphasort so that the subsystem and events are sorted and don't need a secondary qsort. Locally this results in the following change: ... ext4:ext4_zero_range [Tracepoint event] - fib6:fib6_table_lookup [Tracepoint event] fib:fib_table_lookup [Tracepoint event] + fib6:fib6_table_lookup [Tracepoint event] filelock:break_lease_block [Tracepoint event] ... ie fib6 now is after fib and not before it. This is more consistent with how numbers are more generally sorted, such as: ... syscalls:sys_enter_renameat [Tracepoint event] syscalls:sys_enter_renameat2 [Tracepoint event] ... and so an improvement over the qsort approach. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 108 ++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 71 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 6df947df1c0f..dae927250826 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -66,26 +66,21 @@ static int cmp_string(const void *a, const void *b) void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { - DIR *sys_dir, *evt_dir; - struct dirent *sys_dirent, *evt_dirent; - char evt_path[MAXPATHLEN]; - char *dir_path; - char **evt_list = NULL; - unsigned int evt_i = 0, evt_num = 0; - bool evt_num_known = false; - -restart: - sys_dir = tracing_events__opendir(); - if (!sys_dir) - return; - - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_close_sys_dir; - } + struct dirent **sys_namelist = NULL; + bool printed = false; + int sys_items = tracing_events__scandir_alphasort(&sys_namelist); + + for (int i = 0; i < sys_items; i++) { + struct dirent *sys_dirent = sys_namelist[i]; + struct dirent **evt_namelist = NULL; + char *dir_path; + int evt_items; + + if (sys_dirent->d_type != DT_DIR || + !strcmp(sys_dirent->d_name, ".") || + !strcmp(sys_dirent->d_name, "..")) + continue; - for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; @@ -93,69 +88,40 @@ restart: dir_path = get_events_file(sys_dirent->d_name); if (!dir_path) continue; - evt_dir = opendir(dir_path); - if (!evt_dir) - goto next; - for_each_event(dir_path, evt_dir, evt_dirent) { - if (event_glob != NULL && - !strglobmatch(evt_dirent->d_name, event_glob)) + evt_items = scandir(dir_path, &evt_namelist, NULL, alphasort); + for (int j = 0; j < evt_items; j++) { + struct dirent *evt_dirent = evt_namelist[j]; + char evt_path[MAXPATHLEN]; + + if (evt_dirent->d_type != DT_DIR || + !strcmp(evt_dirent->d_name, ".") || + !strcmp(evt_dirent->d_name, "..")) continue; - if (!evt_num_known) { - evt_num++; + if (tp_event_has_id(dir_path, evt_dirent) != 0) + continue; + + if (event_glob != NULL && + !strglobmatch(evt_dirent->d_name, event_glob)) continue; - } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); - - evt_list[evt_i] = strdup(evt_path); - if (evt_list[evt_i] == NULL) { - put_events_file(dir_path); - goto out_close_evt_dir; + if (name_only) + printf("%s ", evt_path); + else { + printf(" %-50s [%s]\n", evt_path, + event_type_descriptors[PERF_TYPE_TRACEPOINT]); } - evt_i++; + printed = true; } - closedir(evt_dir); -next: - put_events_file(dir_path); + free(dir_path); + free(evt_namelist); } - closedir(sys_dir); - - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { - if (name_only) { - printf("%s ", evt_list[evt_i++]); - continue; - } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - } - if (evt_num && pager_in_use()) + free(sys_namelist); + if (printed && pager_in_use()) printf("\n"); - -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_close_evt_dir: - closedir(evt_dir); -out_close_sys_dir: - closedir(sys_dir); - - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - if (evt_list) - goto out_free; } void print_sdt_events(const char *subsys_glob, const char *event_glob, -- cgit v1.2.3 From ca0fe62413f3f536201d788672cd6cf7c4ed52dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:18 -0800 Subject: perf list: Generalize limiting to a PMU name Deprecate the --cputype option and add a --unit option where '--unit cpu_atom' behaves like '--cputype atom'. The --unit option can be used with arbitrary PMUs, for example: ``` $ perf list --unit msr pmu List of pre-defined events (to be used in -e or -M): msr/aperf/ [Kernel PMU event] msr/cpu_thermal_margin/ [Kernel PMU event] msr/mperf/ [Kernel PMU event] msr/pperf/ [Kernel PMU event] msr/smi/ [Kernel PMU event] msr/tsc/ [Kernel PMU event] ``` Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 6 +++--- tools/perf/builtin-list.c | 18 ++++++++++++------ tools/perf/util/metricgroup.c | 3 ++- tools/perf/util/pmu.c | 4 +--- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 57384a97c04f..44a819af573d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,9 +39,9 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. ---cputype:: -Print events applying cpu with this type for hybrid platform -(e.g. --cputype core or --cputype atom) +--unit:: +Print PMU events and metrics limited to the specific PMU name. +(e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom) [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 58e1ec1654ef..cc84ced6da26 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -21,7 +21,6 @@ static bool desc_flag = true; static bool details_flag; -static const char *hybrid_type; int cmd_list(int argc, const char **argv) { @@ -30,6 +29,8 @@ int cmd_list(int argc, const char **argv) bool long_desc_flag = false; bool deprecated = false; char *pmu_name = NULL; + const char *hybrid_name = NULL; + const char *unit_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -40,9 +41,10 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), - OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", - "Print events applying cpu with this type for hybrid platform " - "(e.g. core or atom)"), + OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", + "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), + OPT_STRING(0, "unit", &unit_name, "PMU name", + "Limit PMU or metric printing to the specified PMU."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -53,6 +55,8 @@ int cmd_list(int argc, const char **argv) }; set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + /* Hide hybrid flag for the more generic 'unit' flag. */ + set_option_flag(list_options, 0, "cputype", PARSE_OPT_HIDDEN); argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -62,8 +66,10 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); - if (hybrid_type) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); + if (unit_name) + pmu_name = strdup(unit_name); + else if (hybrid_name) { + pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name); if (!pmu_name) pr_warning("WARNING: hybrid cputype is not supported!\n"); } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4c98ac29ee13..1943fed9b6d9 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -556,11 +556,12 @@ static int metricgroup__print_callback(const struct pmu_event *pe, void *vdata) { struct metricgroup_print_data *data = vdata; + const char *pmu = pe->pmu ?: "cpu"; if (!pe->metric_expr) return 0; - if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu)) + if (data->pmu_name && strcmp(data->pmu_name, pmu)) return 0; return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 057e1528c32f..e6790175307b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1695,10 +1695,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu_name && perf_pmu__is_hybrid(pmu->name) && - strcmp(pmu_name, pmu->name)) { + if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) continue; - } list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : -- cgit v1.2.3 From 3301b3fe9bdcfcb109e44e31160e830fe558faec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:19 -0800 Subject: perf list: Simplify cache event printing The current code computes an array of cache names then sorts and prints them. Use a strlist to create a list of names that is sorted. Keep the hybrid names, it is unclear how to generalize it, but drop the computation of evt_pmus that is never used. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Kang Minchul Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-7-irogers@google.com [ Fixed up clash with cf9f67b36303de65 ("perf print-events: Remove redundant comparison with zero")] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 130 +++++++++-------------------------------- 1 file changed, 27 insertions(+), 103 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index dae927250826..ff7793944246 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -206,135 +206,59 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; - char name[64], new_name[128]; - char **evt_list = NULL, **evt_pmus = NULL; - bool evt_num_known = false; - struct perf_pmu *pmu = NULL; - - if (perf_pmu__has_hybrid()) { - npmus = perf_pmu__hybrid_pmu_num(); - evt_pmus = zalloc(sizeof(char *) * npmus); - if (!evt_pmus) - goto out_enomem; - } + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for hwcache events\n"); + return -ENOMEM; } - - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ if (!evsel__is_cache_op_valid(type, op)) continue; - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - unsigned int hybrid_supported = 0, j; - bool supported; + for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + struct perf_pmu *pmu = NULL; + char name[64]; __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; if (!perf_pmu__has_hybrid()) { - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) { - continue; - } - } else { - perf_pmu__for_each_hybrid_pmu(pmu) { - if (!evt_num_known) { - evt_num++; - continue; - } - - supported = is_event_supported( - PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); - if (supported) { - snprintf(new_name, sizeof(new_name), - "%s/%s/", pmu->name, name); - evt_pmus[hybrid_supported] = - strdup(new_name); - hybrid_supported++; - } - } - - if (hybrid_supported == 0) - continue; - } - - if (!evt_num_known) { - evt_num++; + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) + strlist__add(evt_name_list, name); continue; } - - if ((hybrid_supported == 0) || - (hybrid_supported == npmus)) { - evt_list[evt_i] = strdup(name); - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } else { - for (j = 0; j < hybrid_supported; j++) { - evt_list[evt_i++] = evt_pmus[j]; - evt_pmus[j] = NULL; + perf_pmu__for_each_hybrid_pmu(pmu) { + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) { + char new_name[128]; + snprintf(new_name, sizeof(new_name), + "%s/%s/", pmu->name, name); + strlist__add(evt_name_list, new_name); } - continue; } - - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; } } } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - - for (evt_i = 0; evt_i < evt_num; evt_i++) { - if (!evt_list[evt_i]) - break; - } - - evt_num = evt_i; - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_HW_CACHE]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - - for (evt_i = 0; evt_i < npmus; evt_i++) - zfree(&evt_pmus[evt_i]); - zfree(&evt_pmus); - return evt_num; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_HW_CACHE]); - if (evt_list) - goto out_free; - return evt_num; + strlist__delete(evt_name_list); + return 0; } static void print_tool_event(const struct event_symbol *syms, const char *event_glob, -- cgit v1.2.3 From de3752a7d6c4435087ce7cb8537bfa987b93ec8b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:20 -0800 Subject: perf list: Simplify symbol event printing The current code computes an array of symbol names then sorts and prints them. Use a strlist to create a list of names that is sorted and then print it. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 79 +++++++++++------------------------------- 1 file changed, 21 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ff7793944246..d53dba033597 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -52,14 +52,6 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { }, }; -static int cmp_string(const void *a, const void *b) -{ - const char * const *as = a; - const char * const *bs = b; - - return strcmp(*as, *bs); -} - /* * Print the events from /tracing/events */ @@ -298,77 +290,48 @@ void print_symbol_events(const char *event_glob, unsigned int type, struct event_symbol *syms, unsigned int max, bool name_only) { - unsigned int i, evt_i = 0, evt_num = 0; - char name[MAX_NAME_LEN]; - char **evt_list = NULL; - bool evt_num_known = false; - -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; - syms -= max; - } + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; - for (i = 0; i < max; i++, syms++) { + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for symbol events\n"); + return; + } + for (unsigned int i = 0; i < max; i++) { /* * New attr.config still not supported here, the latest * example was PERF_COUNT_SW_CGROUP_SWITCHES */ - if (syms->symbol == NULL) + if (syms[i].symbol == NULL) continue; - if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || - (syms->alias && strglobmatch(syms->alias, event_glob)))) + if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) || + (syms[i].alias && strglobmatch(syms[i].alias, event_glob)))) continue; if (!is_event_supported(type, i)) continue; - if (!evt_num_known) { - evt_num++; - continue; - } - - if (!name_only && strlen(syms->alias)) - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); - else - strlcpy(name, syms->symbol, MAX_NAME_LEN); + if (strlen(syms[i].alias)) { + char name[MAX_NAME_LEN]; - evt_list[evt_i] = strdup(name); - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); + strlist__add(evt_name_list, name); + } else + strlist__add(evt_name_list, syms[i].symbol); } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[type]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); - if (evt_list) - goto out_free; + strlist__delete(evt_name_list); } /* -- cgit v1.2.3 From eb2d4514a5971444f67ac8b95bcd63e7702fa6bf Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:21 -0800 Subject: perf pmu: Restructure print_pmu_events() to avoid memory allocations Previously print_pmu_events() would compute the values to be printed, place them in struct sevent, sort them and then print them. Modify the code so that struct sevent holds just the PMU and event, sort these and then in the main print loop calculate aliases for names, etc. This avoids memory allocations for copied values as they are computed then printed. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 208 ++++++++++++++++++++++++++------------------------ 1 file changed, 110 insertions(+), 98 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e6790175307b..075c82dd1347 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1553,8 +1553,8 @@ static int sub_non_neg(int a, int b) return a - b; } -static char *format_alias(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { struct parse_events_term *term; int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); @@ -1579,51 +1579,67 @@ static char *format_alias(char *buf, int len, struct perf_pmu *pmu, return buf; } -static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name); return buf; } +/** Struct for ordering events as output in perf list. */ struct sevent { - char *name; - char *desc; - char *topic; - char *str; - char *pmu; - char *metric_expr; - char *metric_name; - int is_cpu; + /** PMU for event. */ + const struct perf_pmu *pmu; + /** + * Optional event for name, desc, etc. If not present then this is a + * selectable PMU and the event name is shown as "//". + */ + const struct perf_pmu_alias *event; + /** Is the PMU for the CPU? */ + bool is_cpu; }; static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + const char *a_pmu_name, *b_pmu_name; + const char *a_name = "//", *a_desc = NULL, *a_topic = ""; + const char *b_name = "//", *b_desc = NULL, *b_topic = ""; int ret; - /* Put extra events last */ - if (!!as->desc != !!bs->desc) - return !!as->desc - !!bs->desc; - if (as->topic && bs->topic) { - int n = strcmp(as->topic, bs->topic); - - if (n) - return n; + if (as->event) { + a_name = as->event->name; + a_desc = as->event->desc; + a_topic = as->event->topic ?: ""; } + if (bs->event) { + b_name = bs->event->name; + b_desc = bs->event->desc; + b_topic = bs->event->topic ?: ""; + } + /* Put extra events last. */ + if (!!a_desc != !!b_desc) + return !!a_desc - !!b_desc; + + /* Order by topics. */ + ret = strcmp(a_topic, b_topic); + if (ret) + return ret; /* Order CPU core events to be first */ if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - ret = strcmp(as->name, bs->name); - if (!ret) { - if (as->pmu && bs->pmu) - return strcmp(as->pmu, bs->pmu); - } + /* Order by PMU name. */ + a_pmu_name = as->pmu->name ?: ""; + b_pmu_name = bs->pmu->name ?: ""; + ret = strcmp(a_pmu_name, b_pmu_name); + if (ret) + return ret; - return ret; + /* Order by event name. */ + return strcmp(a_name, b_name); } static void wordwrap(char *s, int start, int max, int corr) @@ -1655,16 +1671,18 @@ bool is_pmu_core(const char *name) static bool pmu_alias_is_duplicate(struct sevent *alias_a, struct sevent *alias_b) { - /* Different names -> never duplicates */ - if (strcmp(alias_a->name, alias_b->name)) - return false; + const char *a_pmu_name, *b_pmu_name; + const char *a_name = alias_a->event ? alias_a->event->name : "//"; + const char *b_name = alias_b->event ? alias_b->event->name : "//"; - /* Don't remove duplicates for hybrid PMUs */ - if (perf_pmu__is_hybrid(alias_a->pmu) && - perf_pmu__is_hybrid(alias_b->pmu)) + /* Different names -> never duplicates */ + if (strcmp(a_name, b_name)) return false; - return true; + /* Don't remove duplicates for different PMUs */ + a_pmu_name = alias_a->pmu->name ?: ""; + b_pmu_name = alias_b->pmu->name ?: ""; + return strcmp(a_pmu_name, b_pmu_name) == 0; } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, @@ -1690,110 +1708,104 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, len++; } aliases = zalloc(sizeof(struct sevent) * len); - if (!aliases) - goto out_enomem; + if (!aliases) { + pr_err("FATAL: not enough memory to print PMU events\n"); + return; + } pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { + bool is_cpu; + if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) continue; - list_for_each_entry(alias, &pmu->aliases, list) { - char *name = alias->desc ? alias->name : - format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name) || - perf_pmu__is_hybrid(pmu->name); + is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); + list_for_each_entry(alias, &pmu->aliases, list) { if (alias->deprecated && !deprecated) continue; if (event_glob != NULL && - !(strglobmatch_nocase(name, event_glob) || - (!is_cpu && strglobmatch_nocase(alias->name, - event_glob)) || + !(strglobmatch_nocase(alias->name, event_glob) || + (!is_cpu && + strglobmatch_nocase(alias->name, event_glob)) || (alias->topic && strglobmatch_nocase(alias->topic, event_glob)))) continue; - if (is_cpu && !name_only && !alias->desc) - name = format_alias_or(buf, sizeof(buf), pmu, alias); - - aliases[j].name = name; - if (is_cpu && !name_only && !alias->desc) - aliases[j].name = format_alias_or(buf, - sizeof(buf), - pmu, alias); - aliases[j].name = strdup(aliases[j].name); - if (!aliases[j].name) - goto out_enomem; - - aliases[j].desc = long_desc ? alias->long_desc : - alias->desc; - aliases[j].topic = alias->topic; - aliases[j].str = alias->str; - aliases[j].pmu = pmu->name; - aliases[j].metric_expr = alias->metric_expr; - aliases[j].metric_name = alias->metric_name; + aliases[j].event = alias; + aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; j++; } if (pmu->selectable && (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { - char *s; - if (asprintf(&s, "%s//", pmu->name) < 0) - goto out_enomem; - aliases[j].name = s; + aliases[j].event = NULL; + aliases[j].pmu = pmu; + aliases[j].is_cpu = is_cpu; j++; } } len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { + char *name, *desc; + /* Skip duplicates */ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) continue; + if (!aliases[j].event) { + /* A selectable event. */ + snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name); + name = buf; + } else if (aliases[j].event->desc) { + name = aliases[j].event->name; + } else { + if (!name_only && aliases[j].is_cpu) { + name = format_alias_or(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } else { + name = format_alias(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } + } if (name_only) { - printf("%s ", aliases[j].name); + printf("%s ", name); continue; } - if (aliases[j].desc && !quiet_flag) { - if (numdesc++ == 0) - printf("\n"); - if (aliases[j].topic && (!topic || - strcmp(topic, aliases[j].topic))) { - printf("%s%s:\n", topic ? "\n" : "", - aliases[j].topic); - topic = aliases[j].topic; - } - printf(" %-50s\n", aliases[j].name); - printf("%*s", 8, "["); - wordwrap(aliases[j].desc, 8, columns, 0); - printf("]\n"); - if (details_flag) { - printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); - if (aliases[j].metric_name) - printf(" MetricName: %s", aliases[j].metric_name); - if (aliases[j].metric_expr) - printf(" MetricExpr: %s", aliases[j].metric_expr); - putchar('\n'); - } - } else - printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; + if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) { + printf(" %-50s [Kernel PMU event]\n", name); + continue; + } + if (numdesc++ == 0) + printf("\n"); + if (aliases[j].event->topic && (!topic || + strcmp(topic, aliases[j].event->topic))) { + printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic); + topic = aliases[j].event->topic; + } + printf(" %-50s\n", name); + printf("%*s", 8, "["); + desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc; + wordwrap(desc, 8, columns, 0); + printf("]\n"); + if (details_flag) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str); + if (aliases[j].event->metric_name) + printf(" MetricName: %s", aliases[j].event->metric_name); + if (aliases[j].event->metric_expr) + printf(" MetricExpr: %s", aliases[j].event->metric_expr); + putchar('\n'); + } } if (printed && pager_in_use()) printf("\n"); -out_free: - for (j = 0; j < len; j++) - zfree(&aliases[j].name); + zfree(&aliases); return; - -out_enomem: - printf("FATAL: not enough memory to print PMU events\n"); - if (aliases) - goto out_free; } bool pmu_have_event(const char *pname, const char *name) -- cgit v1.2.3 From f5bc4428cc6e096170d35fc79a640d90e33dd9c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:09 -0800 Subject: perf stat: Clear screen only if output file is a tty The --interval-clear option makes perf stat to clear the terminal at each interval. But it doesn't need to clear the screen when it saves to a file. Make it fail when it's enabled with the output options. $ perf stat -I 1 --interval-clear -o myfile true --interval-clear does not work with output Usage: perf stat [] [] -o, --output output file name --log-fd log output to fd, instead of stderr --interval-clear clear screen in between new interval Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d5e1670bca20..1d79801f4e84 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2403,6 +2403,14 @@ int cmd_stat(int argc, const char **argv) } } + if (stat_config.interval_clear && !isatty(fileno(output))) { + fprintf(stderr, "--interval-clear does not work with output\n"); + parse_options_usage(stat_usage, stat_options, "o", 1); + parse_options_usage(NULL, stat_options, "log-fd", 0); + parse_options_usage(NULL, stat_options, "interval-clear", 0); + return -1; + } + stat_config.output = output; /* -- cgit v1.2.3 From 31bf6aea997674a030be02999560d8e6f7a6b974 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:10 -0800 Subject: perf stat: Split print_running() function To make the code more obvious and hopefully simpler, factor out the code for each output mode - stdio, CSV, JSON. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 2a3c1e0098b9..281b811f8574 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -25,24 +25,41 @@ #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" -static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) +static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) +{ + if (run != ena) + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); +} + +static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena) { + double enabled_percent = 100; + + if (run != ena) + enabled_percent = 100 * run / ena; + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, run, config->csv_sep, enabled_percent); +} +static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena) +{ double enabled_percent = 100; if (run != ena) enabled_percent = 100 * run / ena; + fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", + run, enabled_percent); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) +{ if (config->json_output) - fprintf(config->output, - "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", - run, enabled_percent); + print_running_json(config, run, ena); else if (config->csv_output) - fprintf(config->output, - "%s%" PRIu64 "%s%.2f", config->csv_sep, - run, config->csv_sep, enabled_percent); - else if (run != ena) - fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + print_running_csv(config, run, ena); + else + print_running_std(config, run, ena); } static void print_noise_pct(struct perf_stat_config *config, -- cgit v1.2.3 From def99d60df6f21b4c36d23f0071ec8a73f39f28b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:11 -0800 Subject: perf stat: Split print_noise_pct() function Likewise, split print_noise_pct() for each output mode. Although it's a tiny function, more logic will be added soon so it'd be better split it and treat it in the same way. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 281b811f8574..a230f65efa62 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -62,17 +62,36 @@ static void print_running(struct perf_stat_config *config, print_running_std(config, run, ena); } +static void print_noise_pct_std(struct perf_stat_config *config, + double pct) +{ + if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise_pct_csv(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); +} + +static void print_noise_pct_json(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "\"variance\" : %.2f, ", pct); +} + static void print_noise_pct(struct perf_stat_config *config, double total, double avg) { double pct = rel_stddev_stats(total, avg); if (config->json_output) - fprintf(config->output, "\"variance\" : %.2f, ", pct); + print_noise_pct_json(config, pct); else if (config->csv_output) - fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); - else if (pct) - fprintf(config->output, " ( +-%6.2f%% )", pct); + print_noise_pct_csv(config, pct); + else + print_noise_pct_std(config, pct); } static void print_noise(struct perf_stat_config *config, -- cgit v1.2.3 From 41cb875242e71bf1bf5539a724bc65a0b470bb12 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:12 -0800 Subject: perf stat: Split print_cgroup() function Likewise, split print_cgroup() for each output mode. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a230f65efa62..af2a561eb20c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -106,15 +106,32 @@ static void print_noise(struct perf_stat_config *config, print_noise_pct(config, stddev_stats(&ps->res_stats), avg); } +static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, " %s", cgrp_name); +} + +static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); +} + +static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); +} + static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) { if (nr_cgroups) { const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; if (config->json_output) - fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); + print_cgroup_json(config, cgrp_name); + if (config->csv_output) + print_cgroup_csv(config, cgrp_name); else - fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + print_cgroup_std(config, cgrp_name); } } -- cgit v1.2.3 From 33b2e2c2ad68489bc0c97660ef1e6bf7df0c9d7d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:13 -0800 Subject: perf stat: Split aggr_printout() function The aggr_printout() function is to print aggr_id and count (nr). Split it for each output mode to simplify the code. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 220 ++++++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 99 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index af2a561eb20c..ed421f6d512f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -135,124 +135,135 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) } } - -static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id, int nr) +static void print_aggr_id_std(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) { + FILE *output = config->output; + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(output, "S%d-D%d-C%*d %*d ", + id.socket, id.die, -8, id.core, 4, nr); + break; + case AGGR_DIE: + fprintf(output, "S%d-D%*d %*d ", + id.socket, -8, id.die, 4, nr); + break; + case AGGR_SOCKET: + fprintf(output, "S%*d %*d ", + -5, id.socket, 4, nr); + break; + case AGGR_NODE: + fprintf(output, "N%*d %*d ", + -5, id.node, 4, nr); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "S%d-D%d-C%*d ", + id.socket, id.die, -3, id.core); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%*d ", + -7, id.cpu.cpu); + } + break; + case AGGR_THREAD: + fprintf(output, "%*s-%*d ", + 16, perf_thread_map__comm(evsel->core.threads, id.thread_idx), + -8, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + break; + } +} - if (config->json_output && !config->interval) - fprintf(config->output, "{"); +static void print_aggr_id_csv(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + const char *sep = config->csv_sep; switch (config->aggr_mode) { case AGGR_CORE: - if (config->json_output) { - fprintf(config->output, - "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - id.core, - nr); - } else { - fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -8, - id.core, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%d-C%d%s%d%s", + id.socket, id.die, id.core, sep, nr, sep); break; case AGGR_DIE: - if (config->json_output) { - fprintf(config->output, - "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - nr); - } else { - fprintf(config->output, "S%d-D%*d%s%*d%s", - id.socket, - config->csv_output ? 0 : -8, - id.die, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%d%s%d%s", + id.socket, id.die, sep, nr, sep); break; case AGGR_SOCKET: - if (config->json_output) { - fprintf(config->output, - "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", - id.socket, - nr); - } else { - fprintf(config->output, "S%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.socket, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d%s%d%s", + id.socket, sep, nr, sep); break; case AGGR_NODE: - if (config->json_output) { - fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", - id.node, - nr); - } else { - fprintf(config->output, "N%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.node, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "N%d%s%d%s", + id.node, sep, nr, sep); break; case AGGR_NONE: - if (config->json_output) { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"", - id.socket, - id.die, - id.core); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "\"cpu\" : \"%d\", ", - id.cpu.cpu); - } - } else { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "S%d-D%d-C%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -3, - id.core, config->csv_sep); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -7, - id.cpu.cpu, config->csv_sep); - } + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "S%d-D%d-C%d%s", + id.socket, id.die, id.core, sep); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%d%s", + id.cpu.cpu, sep); } break; case AGGR_THREAD: - if (config->json_output) { - fprintf(config->output, "\"thread\" : \"%s-%d\", ", - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - perf_thread_map__pid(evsel->core.threads, id.thread_idx)); - } else { - fprintf(config->output, "%*s-%*d%s", - config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.thread_idx), - config->csv_sep); + fprintf(output, "%s-%d%s", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx), + sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + break; + } +} + +static void print_aggr_id_json(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + + if (!config->interval) + fputc('{', output); + + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, id.core, nr); + break; + case AGGR_DIE: + fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, nr); + break; + case AGGR_SOCKET: + fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + id.socket, nr); + break; + case AGGR_NODE: + fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + id.node, nr); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "\"core\" : \"S%d-D%d-C%d\"", + id.socket, id.die, id.core); + } else if (id.cpu.cpu > -1) { + fprintf(output, "\"cpu\" : \"%d\", ", + id.cpu.cpu); } break; + case AGGR_THREAD: + fprintf(output, "\"thread\" : \"%s-%d\", ", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + break; case AGGR_GLOBAL: case AGGR_UNSET: case AGGR_MAX: @@ -261,6 +272,17 @@ static void aggr_printout(struct perf_stat_config *config, } } +static void aggr_printout(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + if (config->json_output) + print_aggr_id_json(config, evsel, id, nr); + else if (config->csv_output) + print_aggr_id_csv(config, evsel, id, nr); + else + print_aggr_id_std(config, evsel, id, nr); +} + struct outstate { FILE *fh; bool newline; -- cgit v1.2.3 From c2019f844eacd06decc435bdcee9eab1e1a6f4ec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:14 -0800 Subject: perf stat: Factor out print_counter_value() function And split it for each output mode like others. I believe it makes the code simpler and more intuitive. Now abs_printout() becomes just to call sub-functions. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 81 +++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ed421f6d512f..a72c7442ff3d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -517,46 +517,71 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static void abs_printout(struct perf_stat_config *config, - struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) +static void print_counter_value_std(struct perf_stat_config *config, + struct evsel *evsel, double avg) { FILE *output = config->output; double sc = evsel->scale; const char *fmt; - if (config->csv_output) { - fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; - } else { - if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; - } + if (config->big_num) + fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; + else + fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; - aggr_printout(config, evsel, id, nr); + fprintf(output, fmt, avg); - if (config->json_output) - fprintf(output, "\"counter-value\" : \"%f\", ", avg); - else - fprintf(output, fmt, avg, config->csv_sep); + if (evsel->unit) + fprintf(output, "%-*s ", config->unit_width, evsel->unit); - if (config->json_output) { - if (evsel->unit) { - fprintf(output, "\"unit\" : \"%s\", ", - evsel->unit); - } - } else { - if (evsel->unit) - fprintf(output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - evsel->unit, config->csv_sep); - } + fprintf(output, "%-*s", 32, evsel__name(evsel)); +} +static void print_counter_value_csv(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ + FILE *output = config->output; + double sc = evsel->scale; + const char *sep = config->csv_sep; + const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + + fprintf(output, fmt, avg, sep); + + if (evsel->unit) + fprintf(output, "%s%s", evsel->unit, sep); + + fprintf(output, "%s", evsel__name(evsel)); +} + +static void print_counter_value_json(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ + FILE *output = config->output; + + fprintf(output, "\"counter-value\" : \"%f\", ", avg); + + if (evsel->unit) + fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); + + fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); +} + +static void print_counter_value(struct perf_stat_config *config, + struct evsel *evsel, double avg) +{ if (config->json_output) - fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); + print_counter_value_json(config, evsel, avg); + else if (config->csv_output) + print_counter_value_csv(config, evsel, avg); else - fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel)); + print_counter_value_std(config, evsel, avg); +} +static void abs_printout(struct perf_stat_config *config, + struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) +{ + aggr_printout(config, evsel, id, nr); + print_counter_value(config, evsel, avg); print_cgroup(config, evsel); } -- cgit v1.2.3 From d6aeb861b1fb26f55c1b5be4b6e900ad1e734516 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:15 -0800 Subject: perf stat: Handle bad events in abs_printout() In the printout() function, it checks if the event is bad (i.e. not counted or not supported) and print the result. But it does the same what abs_printout() is doing. So add an argument to indicate the value is ok or not and use the same function in both cases. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 68 +++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a72c7442ff3d..fe5483893289 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -518,18 +518,22 @@ static void print_metric_header(struct perf_stat_config *config, } static void print_counter_value_std(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; double sc = evsel->scale; const char *fmt; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (config->big_num) fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; else fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; - fprintf(output, fmt, avg); + if (ok) + fprintf(output, fmt, avg); + else + fprintf(output, "%18s ", bad_count); if (evsel->unit) fprintf(output, "%-*s ", config->unit_width, evsel->unit); @@ -538,14 +542,18 @@ static void print_counter_value_std(struct perf_stat_config *config, } static void print_counter_value_csv(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; double sc = evsel->scale; const char *sep = config->csv_sep; const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - fprintf(output, fmt, avg, sep); + if (ok) + fprintf(output, fmt, avg, sep); + else + fprintf(output, "%s%s", bad_count, sep); if (evsel->unit) fprintf(output, "%s%s", evsel->unit, sep); @@ -554,11 +562,15 @@ static void print_counter_value_csv(struct perf_stat_config *config, } static void print_counter_value_json(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - fprintf(output, "\"counter-value\" : \"%f\", ", avg); + if (ok) + fprintf(output, "\"counter-value\" : \"%f\", ", avg); + else + fprintf(output, "\"counter-value\" : \"%s\", ", bad_count); if (evsel->unit) fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); @@ -567,21 +579,22 @@ static void print_counter_value_json(struct perf_stat_config *config, } static void print_counter_value(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool ok) { if (config->json_output) - print_counter_value_json(config, evsel, avg); + print_counter_value_json(config, evsel, avg, ok); else if (config->csv_output) - print_counter_value_csv(config, evsel, avg); + print_counter_value_csv(config, evsel, avg, ok); else - print_counter_value_std(config, evsel, avg); + print_counter_value_std(config, evsel, avg, ok); } static void abs_printout(struct perf_stat_config *config, - struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) + struct aggr_cpu_id id, int nr, + struct evsel *evsel, double avg, bool ok) { aggr_printout(config, evsel, id, nr); - print_counter_value(config, evsel, avg); + print_counter_value(config, evsel, avg, ok); print_cgroup(config, evsel); } @@ -658,17 +671,8 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm(config, &os, NULL, "", "", 0); return; } - aggr_printout(config, counter, id, nr); - if (config->json_output) { - fprintf(config->output, "\"counter-value\" : \"%s\", ", - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED); - } else { - fprintf(config->output, "%*s%s", - config->csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - config->csv_sep); - } + abs_printout(config, id, nr, counter, uval, /*ok=*/false); if (counter->supported) { if (!evlist__has_hybrid(counter->evlist)) { @@ -678,24 +682,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } } - if (config->json_output) { - fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit); - } else { - fprintf(config->output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - counter->unit, config->csv_sep); - } - - if (config->json_output) { - fprintf(config->output, "\"event\" : \"%s\", ", - evsel__name(counter)); - } else { - fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, evsel__name(counter)); - } - - print_cgroup(config, counter); - if (!config->csv_output && !config->json_output) pm(config, &os, NULL, NULL, "", 0); print_noise(config, counter, noise); @@ -706,7 +692,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } if (!config->metric_only) - abs_printout(config, id, nr, counter, uval); + abs_printout(config, id, nr, counter, uval, /*ok=*/true); out.print_metric = pm; out.new_line = nl; -- cgit v1.2.3 From df46a3c92b510b7776fed14728a271f8836ef19b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:16 -0800 Subject: perf stat: Add before_metric argument Unfortunately, event running time, percentage and noise data are printed in different positions in normal output than CSV/JSON. I think it's better to put such details in where it actually prints. So add before_metric argument to print_noise() and print_running() and call them twice before and after the metric. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 82 +++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index fe5483893289..bf3f2f9d5dee 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -52,14 +52,18 @@ static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena } static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) + u64 run, u64 ena, bool before_metric) { - if (config->json_output) - print_running_json(config, run, ena); - else if (config->csv_output) - print_running_csv(config, run, ena); - else - print_running_std(config, run, ena); + if (config->json_output) { + if (before_metric) + print_running_json(config, run, ena); + } else if (config->csv_output) { + if (before_metric) + print_running_csv(config, run, ena); + } else { + if (!before_metric) + print_running_std(config, run, ena); + } } static void print_noise_pct_std(struct perf_stat_config *config, @@ -82,20 +86,24 @@ static void print_noise_pct_json(struct perf_stat_config *config, } static void print_noise_pct(struct perf_stat_config *config, - double total, double avg) + double total, double avg, bool before_metric) { double pct = rel_stddev_stats(total, avg); - if (config->json_output) - print_noise_pct_json(config, pct); - else if (config->csv_output) - print_noise_pct_csv(config, pct); - else - print_noise_pct_std(config, pct); + if (config->json_output) { + if (before_metric) + print_noise_pct_json(config, pct); + } else if (config->csv_output) { + if (before_metric) + print_noise_pct_csv(config, pct); + } else { + if (!before_metric) + print_noise_pct_std(config, pct); + } } static void print_noise(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool before_metric) { struct perf_stat_evsel *ps; @@ -103,7 +111,7 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats), avg); + print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric); } static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) @@ -637,6 +645,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int }; print_metric_t pm; new_line_t nl; + bool ok = true; if (config->csv_output) { static const int aggr_fields[AGGR_MAX] = { @@ -672,7 +681,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int return; } - abs_printout(config, id, nr, counter, uval, /*ok=*/false); + ok = false; if (counter->supported) { if (!evlist__has_hybrid(counter->evlist)) { @@ -681,37 +690,30 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int config->print_mixed_hw_group_error = 1; } } - - if (!config->csv_output && !config->json_output) - pm(config, &os, NULL, NULL, "", 0); - print_noise(config, counter, noise); - print_running(config, run, ena); - if (config->csv_output || config->json_output) - pm(config, &os, NULL, NULL, "", 0); - return; } - if (!config->metric_only) - abs_printout(config, id, nr, counter, uval, /*ok=*/true); - out.print_metric = pm; out.new_line = nl; out.ctx = &os; out.force_header = false; - if (config->csv_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } else if (config->json_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); + if (!config->metric_only) { + abs_printout(config, id, nr, counter, uval, ok); + + print_noise(config, counter, noise, /*before_metric=*/true); + print_running(config, run, ena, /*before_metric=*/true); + } + + if (ok) { + perf_stat__print_shadow_stats(config, counter, uval, map_idx, + &out, &config->metric_events, st); + } else { + pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } - perf_stat__print_shadow_stats(config, counter, uval, map_idx, - &out, &config->metric_events, st); - if (!config->csv_output && !config->metric_only && !config->json_output) { - print_noise(config, counter, noise); - print_running(config, run, ena); + if (!config->metric_only) { + print_noise(config, counter, noise, /*before_metric=*/false); + print_running(config, run, ena, /*before_metric=*/false); } } @@ -1151,7 +1153,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(config, sd, avg); + print_noise_pct(config, sd, avg, /*before_metric=*/false); } fprintf(output, "\n\n"); -- cgit v1.2.3 From 8d500292bd55c05130c96f6e84cc3e6ba3ebed99 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:17 -0800 Subject: perf stat: Align cgroup names We don't know how long cgroup name is, but at least we can align short ones like below. $ perf stat -a --for-each-cgroup system.slice,user.slice true Performance counter stats for 'system wide': 0.13 msec cpu-clock system.slice # 0.010 CPUs utilized 4 context-switches system.slice # 31.989 K/sec 1 cpu-migrations system.slice # 7.997 K/sec 0 page-faults system.slice # 0.000 /sec 450,673 cycles system.slice # 3.604 GHz (92.41%) 161,216 instructions system.slice # 0.36 insn per cycle (92.41%) 32,678 branches system.slice # 261.332 M/sec (92.41%) 2,628 branch-misses system.slice # 8.04% of all branches (92.41%) 14.29 msec cpu-clock user.slice # 1.163 CPUs utilized 35 context-switches user.slice # 2.449 K/sec 12 cpu-migrations user.slice # 839.691 /sec 57 page-faults user.slice # 3.989 K/sec 49,683,026 cycles user.slice # 3.477 GHz (99.38%) 110,790,266 instructions user.slice # 2.23 insn per cycle (99.38%) 24,552,255 branches user.slice # 1.718 G/sec (99.38%) 127,779 branch-misses user.slice # 0.52% of all branches (99.38%) 0.012289431 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bf3f2f9d5dee..e66f766a3d78 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -116,7 +116,7 @@ static void print_noise(struct perf_stat_config *config, static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) { - fprintf(config->output, " %s", cgrp_name); + fprintf(config->output, " %-16s", cgrp_name); } static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) -- cgit v1.2.3 From b2d9832e00a0d93ad6127fa8ccf2b8e0cfe67397 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:18 -0800 Subject: perf stat: Split print_metric_headers() function The print_metric_headers() shows metric headers a little bit for each mode. Split it out to make the code clearer. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 52 ++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e66f766a3d78..bb2791459f5f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -924,6 +924,37 @@ static const char *aggr_header_csv[] = { [AGGR_GLOBAL] = "" }; +static void print_metric_headers_std(struct perf_stat_config *config, + const char *prefix, bool no_indent) +{ + if (prefix) + fprintf(config->output, "%s", prefix); + if (!no_indent) { + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); + } +} + +static void print_metric_headers_csv(struct perf_stat_config *config, + const char *prefix, + bool no_indent __maybe_unused) +{ + if (prefix) + fprintf(config->output, "%s", prefix); + if (config->interval) + fputs("time,", config->output); + if (!config->iostat_run) + fputs(aggr_header_csv[config->aggr_mode], config->output); +} + +static void print_metric_headers_json(struct perf_stat_config *config, + const char *prefix __maybe_unused, + bool no_indent __maybe_unused) +{ + if (config->interval) + fputs("{\"unit\" : \"sec\"}", config->output); +} + static void print_metric_headers(struct perf_stat_config *config, struct evlist *evlist, const char *prefix, bool no_indent) @@ -939,22 +970,13 @@ static void print_metric_headers(struct perf_stat_config *config, .force_header = true, }; - if (prefix && !config->json_output) - fprintf(config->output, "%s", prefix); + if (config->json_output) + print_metric_headers_json(config, prefix, no_indent); + else if (config->csv_output) + print_metric_headers_csv(config, prefix, no_indent); + else + print_metric_headers_std(config, prefix, no_indent); - if (!config->csv_output && !config->json_output && !no_indent) - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); - if (config->csv_output) { - if (config->interval) - fputs("time,", config->output); - if (!config->iostat_run) - fputs(aggr_header_csv[config->aggr_mode], config->output); - } - if (config->json_output) { - if (config->interval) - fputs("{\"unit\" : \"sec\"}", config->output); - } if (config->iostat_run) iostat_print_header_prefix(config); -- cgit v1.2.3 From 208cbcd21bf57c80ceaf90fcec16cfdfb55a0a4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:19 -0800 Subject: perf stat: Factor out prepare_interval() This logic does not print the time directly, but it just puts the timestamp in the buffer as a prefix. To reduce the confusion, factor out the code into a separate function. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bb2791459f5f..c234be656db9 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -993,9 +993,25 @@ static void print_metric_headers(struct perf_stat_config *config, fputc('\n', config->output); } +static void prepare_interval(struct perf_stat_config *config, + char *prefix, struct timespec *ts) +{ + if (config->iostat_run) + return; + + if (!config->json_output) + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, + ts->tv_nsec, config->csv_sep); + else if (!config->metric_only) + sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) + ts->tv_sec, ts->tv_nsec); + else + sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) + ts->tv_sec, ts->tv_nsec); +} + static void print_interval(struct perf_stat_config *config, - struct evlist *evlist, - char *prefix, struct timespec *ts) + struct evlist *evlist) { bool metric_only = config->metric_only; unsigned int unit_width = config->unit_width; @@ -1005,16 +1021,6 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear && isatty(fileno(output))) puts(CONSOLE_CLEAR); - if (!config->iostat_run && !config->json_output) - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, - ts->tv_nsec, config->csv_sep); - if (!config->iostat_run && config->json_output && !config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) - ts->tv_sec, ts->tv_nsec); - if (!config->iostat_run && config->json_output && config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) - ts->tv_sec, ts->tv_nsec); - if ((num_print_interval == 0 || config->interval_clear) && !config->csv_output && !config->json_output) { switch (config->aggr_mode) { @@ -1252,10 +1258,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (config->iostat_run) evlist->selected = evlist__first(evlist); - if (interval) - print_interval(config, evlist, prefix = buf, ts); - else + if (interval) { + prefix = buf; + prepare_interval(config, prefix, ts); + print_interval(config, evlist); + } else { print_header(config, _target, argc, argv); + } if (metric_only) { static int num_print_iv; -- cgit v1.2.3 From 33c4ed47990f3eaaa732d3fcc2760e6165c16c40 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:20 -0800 Subject: perf stat: Cleanup interval print alignment Instead of using magic values, define symbolic constants and use them. Also add aggr_header_std[] array to simplify aggr_mode handling. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 165 +++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 74 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index c234be656db9..f983432aaddd 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -25,6 +25,45 @@ #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" +#define METRIC_LEN 38 +#define EVNAME_LEN 32 +#define COUNTS_LEN 18 +#define INTERVAL_LEN 16 +#define CGROUP_LEN 16 +#define COMM_LEN 16 +#define PID_LEN 7 +#define CPUS_LEN 4 + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_DIE] = 12, + [AGGR_SOCKET] = 6, + [AGGR_NODE] = 6, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 16, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_DIE] = "die,cpus,", + [AGGR_SOCKET] = "socket,cpus,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" +}; + +static const char *aggr_header_std[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_SOCKET] = "socket", + [AGGR_NONE] = "cpu", + [AGGR_THREAD] = "comm-pid", + [AGGR_NODE] = "node", + [AGGR_GLOBAL] = "" +}; + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -116,7 +155,7 @@ static void print_noise(struct perf_stat_config *config, static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) { - fprintf(config->output, " %-16s", cgrp_name); + fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name); } static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) @@ -147,44 +186,46 @@ static void print_aggr_id_std(struct perf_stat_config *config, struct evsel *evsel, struct aggr_cpu_id id, int nr) { FILE *output = config->output; + int idx = config->aggr_mode; + char buf[128]; switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "S%d-D%d-C%*d %*d ", - id.socket, id.die, -8, id.core, 4, nr); + snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core); break; case AGGR_DIE: - fprintf(output, "S%d-D%*d %*d ", - id.socket, -8, id.die, 4, nr); + snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); break; case AGGR_SOCKET: - fprintf(output, "S%*d %*d ", - -5, id.socket, 4, nr); + snprintf(buf, sizeof(buf), "S%d", id.socket); break; case AGGR_NODE: - fprintf(output, "N%*d %*d ", - -5, id.node, 4, nr); + snprintf(buf, sizeof(buf), "N%d", id.node); break; case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { - fprintf(output, "S%d-D%d-C%*d ", - id.socket, id.die, -3, id.core); + snprintf(buf, sizeof(buf), "S%d-D%d-C%d ", + id.socket, id.die, id.core); + fprintf(output, "%-*s ", + aggr_header_lens[AGGR_CORE], buf); } else if (id.cpu.cpu > -1) { - fprintf(output, "CPU%*d ", - -7, id.cpu.cpu); + fprintf(output, "CPU%-*d ", + aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu); } - break; + return; case AGGR_THREAD: - fprintf(output, "%*s-%*d ", - 16, perf_thread_map__comm(evsel->core.threads, id.thread_idx), - -8, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); - break; + fprintf(output, "%*s-%-*d ", + COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx), + PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + return; case AGGR_GLOBAL: case AGGR_UNSET: case AGGR_MAX: default: - break; + return; } + + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, nr); } static void print_aggr_id_csv(struct perf_stat_config *config, @@ -301,8 +342,6 @@ struct outstate { struct evsel *evsel; }; -#define METRIC_LEN 38 - static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) { @@ -534,19 +573,19 @@ static void print_counter_value_std(struct perf_stat_config *config, const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f "; + fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f "; else - fmt = floor(sc) != sc ? "%18.2f " : "%18.0f "; + fmt = floor(sc) != sc ? "%*.2f " : "%*.0f "; if (ok) - fprintf(output, fmt, avg); + fprintf(output, fmt, COUNTS_LEN, avg); else - fprintf(output, "%18s ", bad_count); + fprintf(output, "%*s ", COUNTS_LEN, bad_count); if (evsel->unit) fprintf(output, "%-*s ", config->unit_width, evsel->unit); - fprintf(output, "%-*s", 32, evsel__name(evsel)); + fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel)); } static void print_counter_value_csv(struct perf_stat_config *config, @@ -904,34 +943,19 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } } -static int aggr_header_lens[] = { - [AGGR_CORE] = 24, - [AGGR_DIE] = 18, - [AGGR_SOCKET] = 12, - [AGGR_NONE] = 6, - [AGGR_THREAD] = 24, - [AGGR_NODE] = 6, - [AGGR_GLOBAL] = 0, -}; - -static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_DIE] = "die,cpus,", - [AGGR_SOCKET] = "socket,cpus,", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" -}; - static void print_metric_headers_std(struct perf_stat_config *config, const char *prefix, bool no_indent) { if (prefix) fprintf(config->output, "%s", prefix); + if (!no_indent) { - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); + int len = aggr_header_lens[config->aggr_mode]; + + if (nr_cgroups) + len += CGROUP_LEN + 1; + + fprintf(config->output, "%*s", len, ""); } } @@ -1025,46 +1049,39 @@ static void print_interval(struct perf_stat_config *config, !config->csv_output && !config->json_output) { switch (config->aggr_mode) { case AGGR_NODE: - fprintf(output, "# time node cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_SOCKET: - fprintf(output, "# time socket cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_DIE: - fprintf(output, "# time die cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; case AGGR_CORE: - fprintf(output, "# time core cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %-*s cpus", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); break; case AGGR_NONE: - fprintf(output, "# time CPU "); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %-*s", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); break; case AGGR_THREAD: - fprintf(output, "# time comm-pid"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + fprintf(output, "#%*s %*s-%-*s", + INTERVAL_LEN - 1, "time", + COMM_LEN, "comm", PID_LEN, "pid"); break; case AGGR_GLOBAL: default: - if (!config->iostat_run) { - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - } + if (!config->iostat_run) + fprintf(output, "#%*s", + INTERVAL_LEN - 1, "time"); case AGGR_UNSET: case AGGR_MAX: break; } + + if (!metric_only) { + fprintf(output, " %*s %*s events\n", + COUNTS_LEN, "counts", unit_width, "unit"); + } } if ((num_print_interval == 0 || config->interval_clear) && metric_only) -- cgit v1.2.3 From 6108712c07422f8ececb163ace7fad2ae0e9a24c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:21 -0800 Subject: perf stat: Remove impossible condition The print would run only if metric_only is not set, but it's already in a block that says it's in metric_only case. And there's no place to change the setting. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f983432aaddd..cc8bb6d07dcb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1292,9 +1292,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) fprintf(config->output, "%s", prefix); - - if (config->json_output && !config->metric_only) - fprintf(config->output, "}"); } switch (config->aggr_mode) { -- cgit v1.2.3 From 4c86b664f4cce6940c187ecfc019f434f8dbd4bc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:22 -0800 Subject: perf stat: Rework header display There are print_header() and print_interval() to print header lines before actual counter values. Also print_metric_headers() needs to be called for the metric-only case. Let's move all these logics to a single place including num_print_iv to refresh the headers for interval mode. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 185 +++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 79 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cc8bb6d07dcb..f97817628478 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1034,94 +1034,129 @@ static void prepare_interval(struct perf_stat_config *config, ts->tv_sec, ts->tv_nsec); } -static void print_interval(struct perf_stat_config *config, - struct evlist *evlist) +static void print_header_interval_std(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) { - bool metric_only = config->metric_only; - unsigned int unit_width = config->unit_width; FILE *output = config->output; - static int num_print_interval; - if (config->interval_clear && isatty(fileno(output))) - puts(CONSOLE_CLEAR); + switch (config->aggr_mode) { + case AGGR_NODE: + case AGGR_SOCKET: + case AGGR_DIE: + case AGGR_CORE: + fprintf(output, "#%*s %-*s cpus", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_NONE: + fprintf(output, "#%*s %-*s", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_THREAD: + fprintf(output, "#%*s %*s-%-*s", + INTERVAL_LEN - 1, "time", + COMM_LEN, "comm", PID_LEN, "pid"); + break; + case AGGR_GLOBAL: + default: + if (!config->iostat_run) + fprintf(output, "#%*s", + INTERVAL_LEN - 1, "time"); + case AGGR_UNSET: + case AGGR_MAX: + break; + } - if ((num_print_interval == 0 || config->interval_clear) && - !config->csv_output && !config->json_output) { - switch (config->aggr_mode) { - case AGGR_NODE: - case AGGR_SOCKET: - case AGGR_DIE: - case AGGR_CORE: - fprintf(output, "#%*s %-*s cpus", - INTERVAL_LEN - 1, "time", - aggr_header_lens[config->aggr_mode], - aggr_header_std[config->aggr_mode]); - break; - case AGGR_NONE: - fprintf(output, "#%*s %-*s", - INTERVAL_LEN - 1, "time", - aggr_header_lens[config->aggr_mode], - aggr_header_std[config->aggr_mode]); - break; - case AGGR_THREAD: - fprintf(output, "#%*s %*s-%-*s", - INTERVAL_LEN - 1, "time", - COMM_LEN, "comm", PID_LEN, "pid"); - break; - case AGGR_GLOBAL: - default: - if (!config->iostat_run) - fprintf(output, "#%*s", - INTERVAL_LEN - 1, "time"); - case AGGR_UNSET: - case AGGR_MAX: - break; - } + if (config->metric_only) + print_metric_headers(config, evlist, " ", true); + else + fprintf(output, " %*s %*s events\n", + COUNTS_LEN, "counts", config->unit_width, "unit"); +} - if (!metric_only) { - fprintf(output, " %*s %*s events\n", - COUNTS_LEN, "counts", unit_width, "unit"); - } - } +static void print_header_std(struct perf_stat_config *config, + struct target *_target, struct evlist *evlist, + int argc, const char **argv) +{ + FILE *output = config->output; + int i; + + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->bpf_str) + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); + else if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); - if ((num_print_interval == 0 || config->interval_clear) && metric_only) + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + + if (config->metric_only) + print_metric_headers(config, evlist, " ", false); +} + +static void print_header_csv(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) + print_metric_headers(config, evlist, " ", true); +} +static void print_header_json(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) print_metric_headers(config, evlist, " ", true); - if (++num_print_interval == 25) - num_print_interval = 0; } static void print_header(struct perf_stat_config *config, struct target *_target, + struct evlist *evlist, int argc, const char **argv) { - FILE *output = config->output; - int i; + static int num_print_iv; fflush(stdout); - if (!config->csv_output && !config->json_output) { - fprintf(output, "\n"); - fprintf(output, " Performance counter stats for "); - if (_target->bpf_str) - fprintf(output, "\'BPF program(s) %s", _target->bpf_str); - else if (_target->system_wide) - fprintf(output, "\'system wide"); - else if (_target->cpu_list) - fprintf(output, "\'CPU(s) %s", _target->cpu_list); - else if (!target__has_task(_target)) { - fprintf(output, "\'%s", argv ? argv[0] : "pipe"); - for (i = 1; argv && (i < argc); i++) - fprintf(output, " %s", argv[i]); - } else if (_target->pid) - fprintf(output, "process id \'%s", _target->pid); - else - fprintf(output, "thread id \'%s", _target->tid); + if (config->interval_clear) + puts(CONSOLE_CLEAR); - fprintf(output, "\'"); - if (config->run_count > 1) - fprintf(output, " (%d runs)", config->run_count); - fprintf(output, ":\n\n"); + if (num_print_iv == 0 || config->interval_clear) { + if (config->json_output) + print_header_json(config, _target, evlist, argc, argv); + else if (config->csv_output) + print_header_csv(config, _target, evlist, argc, argv); + else if (config->interval) + print_header_interval_std(config, _target, evlist, argc, argv); + else + print_header_std(config, _target, evlist, argc, argv); } + + if (num_print_iv++ == 25) + num_print_iv = 0; } static int get_precision(double num) @@ -1278,18 +1313,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (interval) { prefix = buf; prepare_interval(config, prefix, ts); - print_interval(config, evlist); - } else { - print_header(config, _target, argc, argv); } + print_header(config, _target, evlist, argc, argv); if (metric_only) { - static int num_print_iv; - - if (num_print_iv == 0 && !interval) - print_metric_headers(config, evlist, prefix, false); - if (num_print_iv++ == 25) - num_print_iv = 0; if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) fprintf(config->output, "%s", prefix); } -- cgit v1.2.3 From 453279d5739f12cf27b604c84c061a8a7a3937da Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:23 -0800 Subject: perf stat: Move condition to print_footer() Likewise, I think it'd better to have the control inside the function, and keep the higher level function clearer. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f97817628478..73cf898060c0 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1205,6 +1205,9 @@ static void print_footer(struct perf_stat_config *config) double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; + if (config->interval || config->csv_output || config->json_output) + return; + if (!config->null_run) fprintf(output, "\n"); @@ -1359,8 +1362,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf break; } - if (!interval && !config->csv_output && !config->json_output) - print_footer(config); + print_footer(config); fflush(config->output); } -- cgit v1.2.3 From 2cf38236d94605586da0ed0f9ec432bf0eb415c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:24 -0800 Subject: perf stat: Factor out prefix display The prefix is needed for interval mode to print timestamp at the beginning of each line. But the it's tricky for the metric only mode since it doesn't print every evsel and combines the metrics into a single line. So it needed to pass 'first' argument to print_counter_aggrdata() to determine if the current event is being printed at first. This makes the code hard to read. Let's move the logic out of the function and do it in the outer print loop. This would enable further cleanups later. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-17-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 43 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 73cf898060c0..bb40ed29300d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -805,8 +805,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix, bool metric_only, - bool *first) + char *prefix, bool metric_only) { FILE *output = config->output; u64 ena, run, val; @@ -825,10 +824,6 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - if (*first && metric_only) { - *first = false; - aggr_printout(config, counter, id, aggr->nr); - } if (prefix && !metric_only) fprintf(output, "%s", prefix); @@ -849,7 +844,6 @@ static void print_aggr(struct perf_stat_config *config, FILE *output = config->output; struct evsel *counter; int s; - bool first; if (!config->aggr_map || !config->aggr_get_id) return; @@ -860,21 +854,23 @@ static void print_aggr(struct perf_stat_config *config, */ for (s = 0; s < config->aggr_map->nr; s++) { if (metric_only) { + struct perf_stat_aggr *aggr; + struct aggr_cpu_id id = config->aggr_map->map[s]; + if (prefix) fprintf(output, "%s", prefix); - else if (config->summary && !config->no_csv_summary && - config->csv_output && !config->interval) - fprintf(output, "%16s%s", "summary", config->csv_sep); + + counter = evlist__first(evlist); + aggr = &counter->stats->aggr[s]; + aggr_printout(config, counter, id, aggr->nr); } - first = true; evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first); + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } if (metric_only) fputc('\n', output); @@ -885,7 +881,6 @@ static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { bool metric_only = config->metric_only; - bool first = false; int s; /* AGGR_THREAD doesn't have config->aggr_get_id */ @@ -896,9 +891,8 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first); + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } } @@ -1260,7 +1254,6 @@ static void print_percore(struct perf_stat_config *config, FILE *output = config->output; struct cpu_aggr_map *core_map; int s, c, i; - bool first = true; if (!config->aggr_map || !config->aggr_get_id) return; @@ -1288,11 +1281,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - if (prefix && metric_only) - fprintf(output, "%s", prefix); - - print_counter_aggrdata(config, counter, s, - prefix, metric_only, &first); + print_counter_aggrdata(config, counter, s, prefix, metric_only); core_map->map[c++] = core_id; } @@ -1319,10 +1308,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf } print_header(config, _target, evlist, argc, argv); - if (metric_only) { - if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) - fprintf(config->output, "%s", prefix); - } switch (config->aggr_mode) { case AGGR_CORE: @@ -1337,6 +1322,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); else { + if (prefix && metric_only) + fprintf(config->output, "%s", prefix); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } -- cgit v1.2.3 From 78670daefd33c8e0a30b121a2a271a90bca57130 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:25 -0800 Subject: perf stat: Factor out print_metric_{begin,end}() For the metric-only case, add new functions to handle the start and the end of each metric display. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-18-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 56 +++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bb40ed29300d..7a0673be720b 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -836,12 +836,39 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fputc('\n', output); } +static void print_metric_begin(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix, int aggr_idx) +{ + struct perf_stat_aggr *aggr; + struct aggr_cpu_id id; + struct evsel *evsel; + + if (!config->metric_only) + return; + + if (prefix) + fprintf(config->output, "%s", prefix); + + evsel = evlist__first(evlist); + id = config->aggr_map->map[aggr_idx]; + aggr = &evsel->stats->aggr[aggr_idx]; + aggr_printout(config, evsel, id, aggr->nr); +} + +static void print_metric_end(struct perf_stat_config *config) +{ + if (!config->metric_only) + return; + + fputc('\n', config->output); +} + static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { bool metric_only = config->metric_only; - FILE *output = config->output; struct evsel *counter; int s; @@ -853,17 +880,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - if (metric_only) { - struct perf_stat_aggr *aggr; - struct aggr_cpu_id id = config->aggr_map->map[s]; - - if (prefix) - fprintf(output, "%s", prefix); - - counter = evlist__first(evlist); - aggr = &counter->stats->aggr[s]; - aggr_printout(config, counter, id, aggr->nr); - } + print_metric_begin(config, evlist, prefix, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -872,8 +889,7 @@ static void print_aggr(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, prefix, metric_only); } - if (metric_only) - fputc('\n', output); + print_metric_end(config); } } @@ -919,9 +935,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - if (prefix) - fputs(prefix, config->output); - aggr_printout(config, counter, id, 0); + print_metric_begin(config, evlist, prefix, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -933,7 +947,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run, ena, 1.0, &rt_stat, counter_idx); } if (!first) - fputc('\n', config->output); + print_metric_end(config); } } @@ -1322,13 +1336,11 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); else { - if (prefix && metric_only) - fprintf(config->output, "%s", prefix); + print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } - if (metric_only) - fputc('\n', config->output); + print_metric_end(config); } break; case AGGR_NONE: -- cgit v1.2.3 From 67f8b7eb4e9195d037b60f1614880234aca15fee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:26 -0800 Subject: perf stat: Support --for-each-cgroup and --metric-only When we have events for each cgroup, the metric should be printed for each cgroup separately. Add print_cgroup_counter() to handle that situation properly. Also change print_metric_headers() not to print duplicate headers by checking cgroups. $ perf stat -a --for-each-cgroup system.slice,user.slice --metric-only sleep 1 Performance counter stats for 'system wide': GHz insn per cycle branch-misses of all branches system.slice 3.792 0.61 3.24% user.slice 3.661 2.32 0.37% 1.016111516 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-19-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 58 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7a0673be720b..cf25ed99b5df 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -168,10 +168,10 @@ static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_ fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); } -static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) +static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) { - if (nr_cgroups) { - const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + if (nr_cgroups || config->cgroup_list) { + const char *cgrp_name = cgrp ? cgrp->name : ""; if (config->json_output) print_cgroup_json(config, cgrp_name); @@ -340,6 +340,7 @@ struct outstate { int nr; struct aggr_cpu_id id; struct evsel *evsel; + struct cgroup *cgrp; }; static void new_line_std(struct perf_stat_config *config __maybe_unused, @@ -552,6 +553,9 @@ static void print_metric_header(struct perf_stat_config *config, os->evsel->priv != os->evsel->evlist->selected->priv) return; + if (os->evsel->cgrp != os->cgrp) + return; + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); @@ -642,7 +646,7 @@ static void abs_printout(struct perf_stat_config *config, { aggr_printout(config, evsel, id, nr); print_counter_value(config, evsel, avg, ok); - print_cgroup(config, evsel); + print_cgroup(config, evsel->cgrp); } static bool is_mixed_hw_group(struct evsel *counter) @@ -838,7 +842,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - char *prefix, int aggr_idx) + char *prefix, int aggr_idx, + struct cgroup *cgrp) { struct perf_stat_aggr *aggr; struct aggr_cpu_id id; @@ -854,6 +859,8 @@ static void print_metric_begin(struct perf_stat_config *config, id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); + + print_cgroup(config, cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -880,7 +887,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s); + print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -935,7 +942,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, prefix, counter_idx); + print_metric_begin(config, evlist, prefix, + counter_idx, /*cgrp=*/NULL); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -960,7 +968,7 @@ static void print_metric_headers_std(struct perf_stat_config *config, if (!no_indent) { int len = aggr_header_lens[config->aggr_mode]; - if (nr_cgroups) + if (nr_cgroups || config->cgroup_list) len += CGROUP_LEN + 1; fprintf(config->output, "%*s", len, ""); @@ -1012,6 +1020,9 @@ static void print_metric_headers(struct perf_stat_config *config, if (config->iostat_run) iostat_print_header_prefix(config); + if (config->cgroup_list) + os.cgrp = evlist__first(evlist)->cgrp; + /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { os.evsel = counter; @@ -1305,6 +1316,28 @@ static void print_percore(struct perf_stat_config *config, fputc('\n', output); } +static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, + char *prefix) +{ + struct cgroup *cgrp = NULL; + struct evsel *counter; + + evlist__for_each_entry(evlist, counter) { + if (cgrp != counter->cgrp) { + if (cgrp != NULL) + print_metric_end(config); + + cgrp = counter->cgrp; + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, cgrp); + } + + print_counter(config, counter, prefix); + } + if (cgrp) + print_metric_end(config); +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) { @@ -1332,11 +1365,14 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf break; case AGGR_THREAD: case AGGR_GLOBAL: - if (config->iostat_run) + if (config->iostat_run) { iostat_print_counters(evlist, config, ts, prefix = buf, print_counter); - else { - print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0); + } else if (config->cgroup_list) { + print_cgroup_counter(config, evlist, prefix); + } else { + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, prefix); } -- cgit v1.2.3 From 4dd7ff4a0311eee3ac946f0824442de94b34c42e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Nov 2022 15:02:27 -0800 Subject: perf stat: Add print_aggr_cgroup() for --for-each-cgroup and --topdown Normally, --for-each-cgroup only works with AGGR_GLOBAL. However the --topdown on some cpu (e.g. Intel Skylake) converts it to the AGGR_CORE internally. To support those machines, add print_aggr_cgroup and handle the events like in print_cgroup_events(). $ perf stat -a --for-each-cgroup system.slice,user.slice --topdown sleep 1 nmi_watchdog enabled with topdown. May give wrong results. Disable with echo 0 > /proc/sys/kernel/nmi_watchdog Performance counter stats for 'system wide': retiring bad speculation frontend bound backend bound S0-D0-C0 2 system.slice 49.0% -46.6% 31.4% S0-D0-C1 2 system.slice 55.5% 8.0% 45.5% -9.0% S0-D0-C2 2 system.slice 87.8% 22.1% 30.3% -40.3% S0-D0-C3 2 system.slice 53.3% -11.9% 45.2% 13.4% S0-D0-C0 2 user.slice 123.5% 4.0% 48.5% -75.9% S0-D0-C1 2 user.slice 19.9% 6.5% 89.9% -16.3% S0-D0-C2 2 user.slice 29.9% 7.9% 71.3% -9.1 S0-D0-C3 2 user.slice 28.0% 7.2% 43.3% 21.5% 1.004136937 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221114230227.1255976-20-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cf25ed99b5df..f5501760ff2e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -900,6 +900,42 @@ static void print_aggr(struct perf_stat_config *config, } } +static void print_aggr_cgroup(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix) +{ + bool metric_only = config->metric_only; + struct evsel *counter, *evsel; + struct cgroup *cgrp = NULL; + int s; + + if (!config->aggr_map || !config->aggr_get_id) + return; + + evlist__for_each_entry(evlist, evsel) { + if (cgrp == evsel->cgrp) + continue; + + cgrp = evsel->cgrp; + + for (s = 0; s < config->aggr_map->nr; s++) { + print_metric_begin(config, evlist, prefix, s, cgrp); + + evlist__for_each_entry(evlist, counter) { + if (counter->merged_stat) + continue; + + if (counter->cgrp != cgrp) + continue; + + print_counter_aggrdata(config, counter, s, prefix, + metric_only); + } + print_metric_end(config); + } + } +} + static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1361,7 +1397,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: - print_aggr(config, evlist, prefix); + if (config->cgroup_list) + print_aggr_cgroup(config, evlist, prefix); + else + print_aggr(config, evlist, prefix); break; case AGGR_THREAD: case AGGR_GLOBAL: -- cgit v1.2.3 From a6efaa2c89bf35c3cb7f21dee221dabf20dccf59 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:01 -0800 Subject: tools lib api: Add install target This allows libapi to be installed as a dependency. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cmc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Cc: nicolas schier Link: http://lore.kernel.org/lkml/20221109184914.1357295-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index e21e1b40b525..6629d0fd0130 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) @@ -45,10 +55,23 @@ RM = rm -f API_IN := $(OUTPUT)libapi-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) @@ -58,6 +81,32 @@ $(API_IN): FORCE $(LIBFILE): $(API_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,cpu.h,$(prefix)/include/api,644); \ + $(call do_install,debug.h,$(prefix)/include/api,644); \ + $(call do_install,io.h,$(prefix)/include/api,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) -- cgit v1.2.3 From 630ae80ea1dd253609cb50cff87f3248f901aca3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:02 -0800 Subject: tools lib subcmd: Add install target This allows libsubcmd to be installed as a dependency. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 8f1a09cdfd17..e96566f8991c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -17,6 +17,15 @@ RM = rm -f MAKEFLAGS += --no-print-directory +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC @@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) SUBCMD_IN := $(OUTPUT)libsubcmd-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V @@ -61,6 +82,34 @@ $(SUBCMD_IN): FORCE $(LIBFILE): $(SUBCMD_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ + $(call do_install,help.h,$(prefix)/include/subcmd,644); \ + $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ + $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \ + $(call do_install,run-command.h,$(prefix)/include/subcmd,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) -- cgit v1.2.3 From 911920b06e6be3faf87450affb0dbbc6117fd0a5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:03 -0800 Subject: perf build: Install libsubcmd locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libsubcmd build mirroring the libbpf build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index fd7a6ff9e7aa..413ca7e4a02a 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -40,6 +40,7 @@ pmu-events/pmu-events.c pmu-events/jevents feature/ libbpf/ +libsubcmd/ fixdep libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67819f905611..4ec6b95a1c49 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -244,7 +244,7 @@ else # force_fixdep LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBBPF_DIR = $(srctree)/tools/lib/bpf/ -SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -294,7 +294,6 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) ifneq ($(subdir),) API_PATH=$(OUTPUT)/../lib/api/ @@ -305,7 +304,6 @@ else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) - SUBCMD_PATH=$(SUBCMD_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -332,7 +330,14 @@ LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a +ifneq ($(OUTPUT),) + LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd +else + LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd +endif +LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) +LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include +LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a LIBPERF = $(LIBPERF_PATH)libperf.a export LIBPERF @@ -849,11 +854,14 @@ $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBSUBCMD): FORCE - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a +$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) + $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + $@ install_headers $(LIBSUBCMD)-clean: - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean + $(call QUIET_CLEAN, libsubcmd) + $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) help: @echo 'Perf make targets:' @@ -1039,7 +1047,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): +$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL -- cgit v1.2.3 From 00314c9bca8faad495e4e4f702491ecd37444784 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:04 -0800 Subject: perf build: Install libapi locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libapi build mirroring the libbpf and libsubcmd build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 413ca7e4a02a..4a8a76da861d 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,7 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents feature/ +libapi/ libbpf/ libsubcmd/ fixdep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4ec6b95a1c49..f8c712e26a69 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -241,7 +241,7 @@ sub-make: fixdep else # force_fixdep -LIB_DIR = $(srctree)/tools/lib/api/ +LIBAPI_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ @@ -295,15 +295,9 @@ ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) -ifneq ($(subdir),) - API_PATH=$(OUTPUT)/../lib/api/ -else - API_PATH=$(OUTPUT) -endif else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ - API_PATH=$(LIB_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -318,7 +312,14 @@ LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) -LIBAPI = $(API_PATH)libapi.a +ifneq ($(OUTPUT),) + LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi +else + LIBAPI_OUTPUT = $(CURDIR)/libapi +endif +LIBAPI_DESTDIR = $(LIBAPI_OUTPUT) +LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include +LIBAPI = $(LIBAPI_OUTPUT)/libapi.a export LIBAPI ifneq ($(OUTPUT),) @@ -831,12 +832,14 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a +$(LIBAPI): FORCE | $(LIBAPI_OUTPUT) + $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + $@ install_headers $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBAPI_OUTPUT) $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ @@ -1047,7 +1050,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL -- cgit v1.2.3 From 91009a3a9913f2755f8b9616a2124a51147c375c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:05 -0800 Subject: perf build: Install libperf locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libperf build mirroring the libbpf, libsubcmd and libapi build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 4a8a76da861d..b5f10d5603f1 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -41,6 +41,7 @@ pmu-events/jevents feature/ libapi/ libbpf/ +libperf/ libsubcmd/ fixdep libtraceevent-dynamic-list diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f8c712e26a69..57aec5a97270 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -294,11 +294,9 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - LIBPERF_PATH=$(OUTPUT) else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ - LIBPERF_PATH=$(LIBPERF_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -340,7 +338,14 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a -LIBPERF = $(LIBPERF_PATH)libperf.a +ifneq ($(OUTPUT),) + LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf +else + LIBPERF_OUTPUT = $(CURDIR)/libperf +endif +LIBPERF_DESTDIR = $(LIBPERF_OUTPUT) +LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include +LIBPERF = $(LIBPERF_OUTPUT)/libperf.a export LIBPERF # python extension build directories @@ -850,12 +855,14 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) -$(LIBPERF): FORCE - $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a +$(LIBPERF): FORCE | $(LIBPERF_OUTPUT) + $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + $@ install_headers $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBPERF_OUTPUT) $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ @@ -1050,7 +1057,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ ifdef BUILD_BPF_SKEL -- cgit v1.2.3 From ef019df01e207971200ffcb06559f791980668a1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:06 -0800 Subject: perf build: Install libtraceevent locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf headers are all found via this path, making it impossible to override include behavior. Change the libtraceevent build mirroring the libbpf, libsubcmd, libapi and libperf build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Similarly, the plugins are now installed into libtraceevent_plugins except they have no header files. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 3 ++- tools/perf/Makefile.perf | 57 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index b5f10d5603f1..bc165f4e4830 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -43,6 +43,7 @@ libapi/ libbpf/ libperf/ libsubcmd/ +libtraceevent/ +libtraceevent_plugins/ fixdep -libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 57aec5a97270..6e5e3f7730b5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -242,7 +242,8 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ -TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ @@ -292,16 +293,17 @@ grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) - TE_PATH=$(OUTPUT) - PLUGINS_PATH=$(OUTPUT) + LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent else - TE_PATH=$(TRACE_EVENT_DIR) - PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ + LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent endif - -LIBTRACEEVENT = $(TE_PATH)libtraceevent.a +LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins +LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT) +LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT) +LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include +LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list # # The static build has no dynsym table, so this does not work for @@ -821,21 +823,33 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' -$(LIBTRACEEVENT): FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a - -libtraceevent_plugins: FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins - -$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list +$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \ + $@ install_headers $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT) + +libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + plugins + +libtraceevent_plugins-clean: + $(call QUIET_CLEAN, libtraceevent_plugins) + $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT) + +$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) $@ install-traceevent-plugins: libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) install $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ @@ -1060,6 +1074,11 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): $(Q)$(MKDIR) -p $@ +ifndef LIBTRACEEVENT_DYNAMIC +$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT): + $(Q)$(MKDIR) -p $@ +endif + ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) @@ -1102,7 +1121,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected -- cgit v1.2.3 From 8d1f68bd76a6517c132fd1dd223c85c5e1defa49 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:07 -0800 Subject: tools lib api: Add missing install headers Headers necessary for the perf build. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 6629d0fd0130..3e5ef1e0e890 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -103,7 +103,10 @@ install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,cpu.h,$(prefix)/include/api,644); \ $(call do_install,debug.h,$(prefix)/include/api,644); \ - $(call do_install,io.h,$(prefix)/include/api,644); + $(call do_install,io.h,$(prefix)/include/api,644); \ + $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ + $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); + $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); install: install_lib install_headers -- cgit v1.2.3 From a6e8caf5db2e1db8e2ec51f87b9749bd28f6b969 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:08 -0800 Subject: tools lib perf: Add missing install headers Headers necessary for the perf build. Note, internal headers are also installed as these are necessary for the build. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 21df023a2103..1badc0a04676 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -189,13 +189,21 @@ install_lib: libs install_headers: $(call QUIET_INSTALL, headers) \ + $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \ + $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ -- cgit v1.2.3 From 160be157eaba2a37233ff2d27093e5915b6b084e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:09 -0800 Subject: tool lib symbol: Add Makefile/Build Add sufficient Makefile for libsymbol to be built as a dependency and header files installed. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Build | 1 + tools/lib/symbol/Makefile | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 tools/lib/symbol/Build create mode 100644 tools/lib/symbol/Makefile diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build new file mode 100644 index 000000000000..9b9a9c78d3c9 --- /dev/null +++ b/tools/lib/symbol/Build @@ -0,0 +1 @@ +libsymbol-y += kallsyms.o diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile new file mode 100644 index 000000000000..4c1d6b53032d --- /dev/null +++ b/tools/lib/symbol/Makefile @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld + +MAKEFLAGS += --no-print-directory + +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LIBFILE = $(OUTPUT)libsymbol.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC + +ifeq ($(DEBUG),0) +ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -O3 +else + CFLAGS += -O6 +endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 + +CFLAGS += -I$(srctree)/tools/lib +CFLAGS += -I$(srctree)/tools/include + +RM = rm -f + +SYMBOL_IN := $(OUTPUT)libsymbol-in.o + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include + +all: fixdep $(LIBFILE) + +$(SYMBOL_IN): FORCE + $(MAKE) $(build)=libsymbol V=1 + +$(LIBFILE): $(SYMBOL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); + +install: install_lib install_headers + +clean: + $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \ + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE -- cgit v1.2.3 From 84bec6f0b31fb2ac8c5e4b0ded7727e6ec1115db Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:10 -0800 Subject: perf build: Install libsymbol locally when building The perf build currently has a '-Itools/lib' on the CC command line. This causes issues as the libapi, libsubcmd, libtraceevent, libbpf and libsymbol headers are all found via this path, making it impossible to override include behavior. Change the libsymbol build mirroring the libbpf, libsubcmd, libapi, libperf and libtraceevent build, so that it is installed in a directory along with its headers. A later change will modify the include behavior. Don't build kallsyms.o as part of util as this will lead to duplicate definitions. Add kallsym's directory to the MANIFEST rather than individual files, so that the Build and Makefile are added to a source tar ball. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/MANIFEST | 3 +-- tools/perf/Makefile.perf | 25 ++++++++++++++++++++++--- tools/perf/util/Build | 5 ----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index bc165f4e4830..05806ecfc33c 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -43,6 +43,7 @@ libapi/ libbpf/ libperf/ libsubcmd/ +libsymbol/ libtraceevent/ libtraceevent_plugins/ fixdep diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f5d72f936a6b..c8e8e05b4ff1 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -13,8 +13,7 @@ tools/lib/ctype.c tools/lib/hweight.c tools/lib/rbtree.c tools/lib/string.c -tools/lib/symbol/kallsyms.c -tools/lib/symbol/kallsyms.h +tools/lib/symbol tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/list_sort.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6e5e3f7730b5..6c1a2a3ccc38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -246,6 +246,7 @@ LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -340,6 +341,15 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a +ifneq ($(OUTPUT),) + LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol +else + LIBSYMBOL_OUTPUT = $(CURDIR)/libsymbol +endif +LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT) +LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include +LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a + ifneq ($(OUTPUT),) LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf else @@ -398,7 +408,7 @@ endif export PERL_PATH -PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF) +PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) ifndef NO_LIBBPF ifndef LIBBPF_DYNAMIC PERFLIBS += $(LIBBPF) @@ -887,6 +897,15 @@ $(LIBSUBCMD)-clean: $(call QUIET_CLEAN, libsubcmd) $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) +$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) + $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + $@ install_headers + +$(LIBSYMBOL)-clean: + $(call QUIET_CLEAN, libsymbol) + $(Q)$(RM) -r -- $(LIBSYMBOL_OUTPUT) + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -1071,7 +1090,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ ifndef LIBTRACEEVENT_DYNAMIC @@ -1121,7 +1140,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 47a7db3ad0a1..ab37f588ee8b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -19,7 +19,6 @@ perf-y += perf_event_attr_fprintf.o perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o -perf-y += kallsyms.o perf-y += levenshtein.o perf-y += llvm-utils.o perf-y += mmap.o @@ -294,10 +293,6 @@ CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) - $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -- cgit v1.2.3 From bd560973c5d3b2a37fb13d769b34385ed320d547 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:11 -0800 Subject: perf expr: Tidy hashmap dependency hashmap.h comes from libbpf but isn't installed with its headers. Always use the header file of the code in util. Change the hashmap.h dependency in expr.h to a forward declaration, add the necessary header file includes in the C files. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 1 + tools/perf/tests/pmu-events.c | 1 + tools/perf/util/bpf-loader.c | 4 ---- tools/perf/util/evsel.c | 4 ---- tools/perf/util/expr.c | 1 + tools/perf/util/expr.h | 7 +------ tools/perf/util/metricgroup.c | 1 + tools/perf/util/stat-shadow.c | 1 + tools/perf/util/stat.c | 4 ---- 9 files changed, 6 insertions(+), 18 deletions(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 6512f5e22045..b6667501ebb4 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -2,6 +2,7 @@ #include "util/cputopo.h" #include "util/debug.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/header.h" #include "util/smt.h" #include "tests.h" diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 5d0d3b239a68..f7b9dbbad97f 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -12,6 +12,7 @@ #include #include "util/evlist.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/parse-events.h" #include "metricgroup.h" #include "stat.h" diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f4adeccdbbcb..b3c8174360bf 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -27,11 +27,7 @@ #include "util.h" #include "llvm-utils.h" #include "c++/clang-c.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include "asm/bug.h" #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ca6abb64c91d..ca554f8202f9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,11 +46,7 @@ #include "string2.h" #include "memswap.h" #include "util.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include "pmu-hybrid.h" #include "off_cpu.h" #include "../perf-sys.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index aaacf514dc09..140f2acdb325 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -11,6 +11,7 @@ #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" +#include "util/hashmap.h" #include "smt.h" #include "tsc.h" #include diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index d6c1668dc1a0..029271540fb0 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,12 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else -#include "util/hashmap.h" -#endif - +struct hashmap; struct metric_ref; struct expr_scanner_ctx { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1943fed9b6d9..cf9e2452d322 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -28,6 +28,7 @@ #include "util.h" #include #include "cgroup.h" +#include "util/hashmap.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07b29fe272c7..9bde9224a97c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,7 @@ #include "units.h" #include #include "iostat.h" +#include "util/hashmap.h" /* * AGGR_GLOBAL: Use CPU 0 diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index acf0edf5fdd1..673f017a211f 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -14,11 +14,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include -#else #include "util/hashmap.h" -#endif #include void update_stats(struct stats *stats, u64 val) -- cgit v1.2.3 From fd3f518fc1140622e752ac51d0ff18bb74f1d180 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:12 -0800 Subject: perf thread_map: Reduce exposure of libperf internal API Remove unnecessary include of internal threadmap.h and refcount.h in thread_map.h. Switch to using public APIs when possible or including the internal header file in the C file. Fix a transitive dependency in openat-syscall.c broken by the clean up. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/builtin-trace.c | 4 ++-- tools/perf/tests/openat-syscall.c | 1 + tools/perf/tests/thread-map.c | 1 + tools/perf/util/bpf_counter.c | 2 +- tools/perf/util/evsel.c | 1 + tools/perf/util/python.c | 6 +++--- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/thread_map.c | 1 + tools/perf/util/thread_map.h | 2 -- 10 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1d79801f4e84..d040fbcdcc5a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -93,6 +93,7 @@ #include #include +#include #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c93b359abc31..3257da5cad23 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4095,8 +4095,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || - evlist->core.threads->nr > 1 || - evlist__first(evlist)->core.attr.inherit; + perf_thread_map__nr(evlist->core.threads) > 1 || + evlist__first(evlist)->core.attr.inherit; /* * Now that we already used evsel->core.attr to ask the kernel to setup the diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 7e05b8b5cc95..131b62271bfa 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "thread_map.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index e413c1387fcb..74308c1368fe 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -11,6 +11,7 @@ #include "util/synthetic-events.h" #include #include +#include struct perf_sample; struct perf_tool; diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ef1c15e4aeba..eeee899fcf34 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -561,7 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) - key = evsel->core.threads->map[i].pid; + key = perf_thread_map__pid(evsel->core.threads, i); else if (filter_type == BPERF_FILTER_CPU) key = evsel->core.cpus->map[i].cpu; else diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ca554f8202f9..45f4f08399ae 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -53,6 +53,7 @@ #include "util/parse-branch-options.h" #include #include +#include #include diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 5be5fa2391de..b5941c74a0d6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -718,17 +718,17 @@ static Py_ssize_t pyrf_thread_map__length(PyObject *obj) { struct pyrf_thread_map *pthreads = (void *)obj; - return pthreads->threads->nr; + return perf_thread_map__nr(pthreads->threads); } static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= pthreads->threads->nr) + if (i >= perf_thread_map__nr(pthreads->threads)) return NULL; - return Py_BuildValue("i", pthreads->threads->map[i]); + return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } static PySequenceMethods pyrf_thread_map__sequence_methods = { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1985d1a42a22..1cf65db8f861 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1654,7 +1654,7 @@ static void python_process_stat(struct perf_stat_config *config, struct perf_cpu_map *cpus = counter->core.cpus; int cpu, thread; - for (thread = 0; thread < threads->nr; thread++) { + for (thread = 0; thread < perf_thread_map__nr(threads); thread++) { for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { process_stat(counter, perf_cpu_map__cpu(cpus, cpu), perf_thread_map__pid(threads, thread), tstamp, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943..e848579e61a8 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -18,6 +18,7 @@ #include "thread_map.h" #include "debug.h" #include "event.h" +#include /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3bb860a32b8e..00ec05fc1656 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -4,8 +4,6 @@ #include #include -#include -#include #include struct perf_record_thread_map; -- cgit v1.2.3 From c4d9d95f84d7f4758db76f358b03a0d6809c929b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 9 Nov 2022 10:49:13 -0800 Subject: perf cpumap: Tidy libperf includes Use public API when possible, don't include internal API in header files in evsel.h. Fix any related breakages. Committer note: There was one missing case, when building for arm64: arch/arm64/util/pmu.c: In function 'pmu_events_table__find': arch/arm64/util/pmu.c:18:30: error: invalid use of undefined type 'struct perf_cpu_map' 18 | if (pmu->cpus->nr != cpu__max_cpu().cpu) | ^~ Fix it by adding one more exception, including Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 1 + tools/perf/tests/cpumap.c | 2 +- tools/perf/util/auxtrace.h | 2 +- tools/perf/util/cpumap.c | 1 + tools/perf/util/cpumap.h | 2 +- tools/perf/util/evsel.h | 2 -- 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index f849b1e88d43..477e513972a4 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include "../../../util/cpumap.h" #include "../../../util/pmu.h" diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7c873c6ae3eb..3150fc1fed6f 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -6,7 +6,7 @@ #include "util/synthetic-events.h" #include #include -#include +#include #include "debug.h" struct machine; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 6a0f9b98f059..2cf63d377831 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6e3fcf523de9..5e564974fba4 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -12,6 +12,7 @@ #include #include +#include static struct perf_cpu max_cpu_num; static struct perf_cpu max_present_cpu_num; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index da28b3146ef9..c2f5824a3a22 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -4,8 +4,8 @@ #include #include -#include #include +#include /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 467bb0b32fef..f3485799ddf9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -10,8 +10,6 @@ #include #include #include "symbol_conf.h" -#include -#include struct bpf_object; struct cgroup; -- cgit v1.2.3 From 746bd29e348f99b44c14cb2b2a57f1d3efb66538 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 15 Nov 2022 23:22:11 -0800 Subject: perf build: Use tools/lib headers from install path Switch -I from tools/lib to the install path for the tools/lib libraries. Add the include_headers build targets to prepare target, as well as pmu-events.c compilation that dependes on libperf. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221109184914.1357295-15-irogers@google.com Link: http://lore.kernel.org/lkml/20221116072211.2837834-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 -- tools/perf/Makefile.perf | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a7f6c0669fae..9cc3c48f3288 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -349,7 +349,6 @@ ifeq ($(DEBUG),0) endif endif -INC_FLAGS += -I$(srctree)/tools/lib/perf/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/ @@ -367,7 +366,6 @@ endif INC_FLAGS += -I$(src-perf)/util INC_FLAGS += -I$(src-perf) -INC_FLAGS += -I$(srctree)/tools/lib/ CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6c1a2a3ccc38..8c0df762fb02 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -305,6 +305,7 @@ LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list +CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include # # The static build has no dynsym table, so this does not work for @@ -322,6 +323,7 @@ LIBAPI_DESTDIR = $(LIBAPI_OUTPUT) LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include LIBAPI = $(LIBAPI_OUTPUT)/libapi.a export LIBAPI +CFLAGS += -I$(LIBAPI_OUTPUT)/include ifneq ($(OUTPUT),) LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf @@ -331,6 +333,7 @@ endif LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a +CFLAGS += -I$(LIBBPF_OUTPUT)/include ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -340,6 +343,7 @@ endif LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a +CFLAGS += -I$(LIBSUBCMD_OUTPUT)/include ifneq ($(OUTPUT),) LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol @@ -349,6 +353,7 @@ endif LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT) LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a +CFLAGS += -I$(LIBSYMBOL_OUTPUT)/include ifneq ($(OUTPUT),) LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf @@ -359,6 +364,7 @@ LIBPERF_DESTDIR = $(LIBPERF_OUTPUT) LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include LIBPERF = $(LIBPERF_OUTPUT)/libperf.a export LIBPERF +CFLAGS += -I$(LIBPERF_OUTPUT)/include # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -691,14 +697,14 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf -$(PMU_EVENTS_IN): FORCE +$(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): FORCE prepare $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -774,6 +780,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(rename_flags_array) \ $(arch_errno_name_array) \ $(sync_file_range_arrays) \ + $(LIBAPI) \ + $(LIBBPF) \ + $(LIBPERF) \ + $(LIBSUBCMD) \ + $(LIBSYMBOL) \ + $(LIBTRACEEVENT) \ bpf-skel $(OUTPUT)%.o: %.c prepare FORCE -- cgit v1.2.3 From 35fef9b471c70413f8277984920129ddf601f5e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Nov 2022 15:40:46 -0300 Subject: libperf: Remove recursive perf/cpumap.h include from perf/cpumap.h It just hits the header guard, becoming a no-op, ditch it. Acked-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 03aceb72a783..98e463ec15a7 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,7 +3,6 @@ #define __LIBPERF_CPUMAP_H #include -#include #include #include -- cgit v1.2.3 From 63a3bf5e8d9e79ce456c8f73d4395a5a51d841b1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Nov 2022 15:43:34 -0300 Subject: libperf: Add missing 'struct perf_cpu_map' forward declaration to perf/cpumap.h The perf/cpumap.h header is getting the 'struct perf_cpu_map' forward declaration by luck, add it. Acked-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 98e463ec15a7..3f43f770cdac 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -11,6 +11,8 @@ struct perf_cpu { int cpu; }; +struct perf_cpu_map; + LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -- cgit v1.2.3 From 586cb1d65cc44371115600bc981626725c864029 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:51 -0800 Subject: tools lib api: Clean up install_headers Add missing backslash that caused an install command to always appear in build output. Make the install headers more specific. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 3e5ef1e0e890..3649c7f7ea65 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -100,12 +100,12 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libapi_headers) \ $(call do_install,cpu.h,$(prefix)/include/api,644); \ $(call do_install,debug.h,$(prefix)/include/api,644); \ $(call do_install,io.h,$(prefix)/include/api,644); \ $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ - $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); + $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \ $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); install: install_lib install_headers -- cgit v1.2.3 From daa45f3f3577556801c7b0b2df85eed1289fbcb6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:52 -0800 Subject: tools lib bpf: Avoid install_headers make warning The perf build makes the install_headers target, however, as there is no action for this target a warning is always produced of: make[3]: Nothing to be done for 'install_headers'. Solve this by adding a display of 'INSTALL libbpf_headers'. Signed-off-by: Ian Rogers Acked-by: Andrii Nakryiko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4c904ef0b47e..7f5f7d2ebe1f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -255,6 +255,7 @@ $(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h $(call do_install,$<,$(prefix)/include/bpf,644) install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) + $(call QUIET_INSTALL, libbpf_headers) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ -- cgit v1.2.3 From 806dda31b856d83d8ec211aa9831bac5f978271e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:53 -0800 Subject: tools lib symbol: Clean up build output Missing @ when building libsymbol. Make the install echo specific to installing the libsymbol headers. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile index 4c1d6b53032d..ea8707b3442a 100644 --- a/tools/lib/symbol/Makefile +++ b/tools/lib/symbol/Makefile @@ -77,7 +77,7 @@ include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) $(SYMBOL_IN): FORCE - $(MAKE) $(build)=libsymbol V=1 + @$(MAKE) $(build)=libsymbol $(LIBFILE): $(SYMBOL_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) @@ -101,7 +101,7 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libsymbol_headers) \ $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); install: install_lib install_headers -- cgit v1.2.3 From e8951bfb4cb325a6b80310790dc78ac9b4a147eb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:54 -0800 Subject: tools lib perf: Make install_headers clearer Add libperf to the name so that this install_headers build appears different to similar targets in different libraries. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1badc0a04676..a90fb8c6bed4 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -188,7 +188,7 @@ install_lib: libs cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libperf_headers) \ $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ -- cgit v1.2.3 From 77dce6890a2a715b186bdc149c843571a5bb47df Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:55 -0800 Subject: tools lib subcmd: Make install_headers clearer Add libsubcmd to the name so that this install_headers build appears different to similar targets in different libraries. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index e96566f8991c..9a316d8b89df 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -101,7 +101,7 @@ install_lib: $(LIBFILE) cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, libsubcmd_headers) \ $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ $(call do_install,help.h,$(prefix)/include/subcmd,644); \ $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ -- cgit v1.2.3 From e664f31e21a2d201507704f302ab32f498871b11 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 16:43:56 -0800 Subject: tools lib traceevent: Make install_headers clearer Add libtraceevent to the name so that this install_headers build appears different to similar targets in different libraries. Add ; after kbuffer.h install target for consistency. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Song Liu Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221117004356.279422-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index c874c017c636..98dfd4badea3 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -234,11 +234,11 @@ install_pkgconfig: $(call do_install_pkgconfig_file,$(prefix)) install_headers: - $(call QUIET_INSTALL, headers) \ + $(call QUIET_INSTALL, traceevent_headers) \ $(call do_install,event-parse.h,$(includedir_SQ),644); \ $(call do_install,event-utils.h,$(includedir_SQ),644); \ $(call do_install,trace-seq.h,$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(includedir_SQ),644) + $(call do_install,kbuffer.h,$(includedir_SQ),644); install: install_lib -- cgit v1.2.3 From f215054d749b17c56e014fdca2fcc592dac4529c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:43 -0800 Subject: perf test: Add -w/--workload option The -w/--workload option is to run a simple workload used by testing. This adds a basic framework to run the workloads and 'noploop' workload as an example. $ perf test -w noploop The noploop does a loop doing nothing (NOP) for a second by default. It can have an optional argument to specify the time in seconds. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 2 ++ tools/perf/tests/builtin-test.c | 24 ++++++++++++++++++++++++ tools/perf/tests/tests.h | 22 ++++++++++++++++++++++ tools/perf/tests/workloads/Build | 3 +++ tools/perf/tests/workloads/noploop.c | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+) create mode 100644 tools/perf/tests/workloads/Build create mode 100644 tools/perf/tests/workloads/noploop.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2064a640facb..11b69023011b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -103,3 +103,5 @@ endif CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls + +perf-y += workloads/ diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 7122eae1d98d..ce641ccfcf81 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,10 @@ static struct test_suite **tests[] = { arch_tests, }; +static struct test_workload *workloads[] = { + &workload__noploop, +}; + static int num_subtests(const struct test_suite *t) { int num; @@ -475,6 +479,21 @@ static int perf_test__list(int argc, const char **argv) return 0; } +static int run_workload(const char *work, int argc, const char **argv) +{ + unsigned int i = 0; + struct test_workload *twl; + + for (i = 0; i < ARRAY_SIZE(workloads); i++) { + twl = workloads[i]; + if (!strcmp(twl->name, work)) + return twl->func(argc, argv); + } + + pr_info("No workload found: %s\n", work); + return -1; +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -482,12 +501,14 @@ int cmd_test(int argc, const char **argv) NULL, }; const char *skip = NULL; + const char *workload = NULL; const struct option test_options[] = { OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), + OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; @@ -504,6 +525,9 @@ int cmd_test(int argc, const char **argv) if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); + if (workload) + return run_workload(workload, argc, argv); + symbol_conf.priv_size = sizeof(int); symbol_conf.sort_by_name = true; symbol_conf.try_vmlinux_path = true; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 5bbb8f6a48fc..d315d0d6fc97 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -180,4 +180,26 @@ int test__arch_unwind_sample(struct perf_sample *sample, DECLARE_SUITE(vectors_page); #endif +/* + * Define test workloads to be used in test suites. + */ +typedef int (*workload_fnptr)(int argc, const char **argv); + +struct test_workload { + const char *name; + workload_fnptr func; +}; + +#define DECLARE_WORKLOAD(work) \ + extern struct test_workload workload__##work + +#define DEFINE_WORKLOAD(work) \ +struct test_workload workload__##work = { \ + .name = #work, \ + .func = work, \ +} + +/* The list of test workloads */ +DECLARE_WORKLOAD(noploop); + #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build new file mode 100644 index 000000000000..f98e968d4633 --- /dev/null +++ b/tools/perf/tests/workloads/Build @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +perf-y += noploop.o diff --git a/tools/perf/tests/workloads/noploop.c b/tools/perf/tests/workloads/noploop.c new file mode 100644 index 000000000000..940ea5910a84 --- /dev/null +++ b/tools/perf/tests/workloads/noploop.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +static int noploop(int argc, const char **argv) +{ + int sec = 1; + + if (argc > 0) + sec = atoi(argv[0]); + + signal(SIGINT, sighandler); + signal(SIGALRM, sighandler); + alarm(sec); + + while (!done) + continue; + + return 0; +} + +DEFINE_WORKLOAD(noploop); -- cgit v1.2.3 From 24e733b29f13284aac30c4d1fb9f19201951d770 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:44 -0800 Subject: perf test: Replace pipe test workload with noploop So that it can get rid of requirement of a compiler. Also define and use more local symbols to ease future changes. $ sudo ./perf test -v pipe 87: perf pipe recording and injection test : --- start --- test child forked, pid 748003 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 748014 748014 -1 |perf [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 99.83% perf perf [.] noploop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 99.85% perf perf [.] noploop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.160 MB /tmp/perf.data.2XYPdw (4007 samples) ] 99.83% perf perf [.] noploop test child finished with 0 ---- end ---- perf pipe recording and injection test: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/pipe_test.sh | 55 +++++++------------------------------ 1 file changed, 10 insertions(+), 45 deletions(-) diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index 1b32b4f28391..8dd115dd35a7 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -2,68 +2,33 @@ # perf pipe recording and injection test # SPDX-License-Identifier: GPL-2.0 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - -file=$(mktemp /tmp/test.file.XXXXXX) data=$(mktemp /tmp/perf.data.XXXXXX) +prog="perf test -w noploop" +task="perf" +sym="noploop" -cat < -#include -#include - -volatile int done; - -void sigalrm(int sig) { - done = 1; -} - -__attribute__((noinline)) void noploop(void) { - while (!done) - continue; -} - -int main(int argc, char *argv[]) { - int sec = 1; - - if (argc > 1) - sec = atoi(argv[1]); - - signal(SIGALRM, sigalrm); - alarm(sec); - - noploop(); - return 0; -} -EOF - - -if ! perf record -e task-clock:u -o - ${file} | perf report -i - --task | grep test.file; then +if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then echo "cannot find the test file in the perf report" exit 1 fi -if ! perf record -e task-clock:u -o - ${file} | perf inject -b | perf report -i - | grep noploop; then +if ! perf record -e task-clock:u -o - ${prog} | perf inject -b | perf report -i - | grep ${sym}; then echo "cannot find noploop function in pipe #1" exit 1 fi -perf record -e task-clock:u -o - ${file} | perf inject -b -o ${data} -if ! perf report -i ${data} | grep noploop; then +perf record -e task-clock:u -o - ${prog} | perf inject -b -o ${data} +if ! perf report -i ${data} | grep ${sym}; then echo "cannot find noploop function in pipe #2" exit 1 fi -perf record -e task-clock:u -o ${data} ${file} -if ! perf inject -b -i ${data} | perf report -i - | grep noploop; then +perf record -e task-clock:u -o ${data} ${prog} +if ! perf inject -b -i ${data} | perf report -i - | grep ${sym}; then echo "cannot find noploop function in pipe #3" exit 1 fi -rm -f ${file} ${data} ${data}.old +rm -f ${data} ${data}.old exit 0 -- cgit v1.2.3 From 69b352927885b17f03d3ee4ee38f580699af107a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:45 -0800 Subject: perf test: Add 'thloop' test workload The thloop is similar to noploop but runs in two threads. This is needed to verify perf record --per-thread to handle multi-threaded programs properly. $ perf test -w thloop It also takes an optional argument to specify runtime in seconds (default: 1). Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 1 + tools/perf/tests/workloads/thloop.c | 53 +++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 tools/perf/tests/workloads/thloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index ce641ccfcf81..161f38476e77 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -120,6 +120,7 @@ static struct test_suite **tests[] = { static struct test_workload *workloads[] = { &workload__noploop, + &workload__thloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d315d0d6fc97..e6edfeeadaeb 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -201,5 +201,6 @@ struct test_workload workload__##work = { \ /* The list of test workloads */ DECLARE_WORKLOAD(noploop); +DECLARE_WORKLOAD(thloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index f98e968d4633..b8964b1099c0 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 perf-y += noploop.o +perf-y += thloop.o diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c new file mode 100644 index 000000000000..29193b75717e --- /dev/null +++ b/tools/perf/tests/workloads/thloop.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; +static volatile unsigned count; + +/* We want to check this symbol in perf report */ +noinline void test_loop(void); + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +noinline void test_loop(void) +{ + while (!done) + count++; +} + +static void *thfunc(void *arg) +{ + void (*loop_fn)(void) = arg; + + loop_fn(); + return NULL; +} + +static int thloop(int argc, const char **argv) +{ + int sec = 1; + pthread_t th; + + if (argc > 0) + sec = atoi(argv[0]); + + signal(SIGINT, sighandler); + signal(SIGALRM, sighandler); + alarm(sec); + + pthread_create(&th, NULL, thfunc, test_loop); + test_loop(); + pthread_join(th, NULL); + + return 0; +} + +DEFINE_WORKLOAD(thloop); -- cgit v1.2.3 From 0b8ff0ba2744f364e8f5fb695ae323bae0ecfd19 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:46 -0800 Subject: perf test: Replace record test workload with thloop So that it can get rid of requirements for a compiler. $ sudo ./perf test -v 92 92: perf record tests : --- start --- test child forked, pid 740204 Basic --per-thread mode test Basic --per-thread mode test [Success] Register capture test Register capture test [Success] Basic --system-wide mode test Basic --system-wide mode test [Success] Basic target workload test Basic target workload test [Success] test child finished with 0 ---- end ---- perf record tests: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 59 ++-------------------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index e93b3a8871fe..4dff89e3a3fd 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -9,17 +9,13 @@ shelldir=$(dirname "$0") err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) +testprog="perf test -w thloop" testsym="test_loop" cleanup() { rm -rf "${perfdata}" rm -rf "${perfdata}".old - if [ "${testprog}" != "true" ]; then - rm -f "${testprog}" - fi - trap - EXIT TERM INT } @@ -29,53 +25,6 @@ trap_cleanup() { } trap trap_cleanup EXIT TERM INT -build_test_program() { - if ! [ -x "$(command -v cc)" ]; then - # No CC found. Fall back to 'true' - testprog=true - testsym=true - return - fi - - echo "Build a test program" - cat < -#include -#include - -void test_loop(void) { - volatile int count = 1000000; - - while (count--) - continue; -} - -void *thfunc(void *arg) { - int forever = *(int *)arg; - - do { - test_loop(); - } while (forever); - - return NULL; -} - -int main(int argc, char *argv[]) { - pthread_t th; - int forever = 0; - - if (argc > 1) - forever = atoi(argv[1]); - - pthread_create(&th, NULL, thfunc, &forever); - test_loop(); - pthread_join(th, NULL); - - return 0; -} -EOF -} - test_per_thread() { echo "Basic --per-thread mode test" if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null @@ -96,8 +45,8 @@ test_per_thread() { return fi - # run the test program in background (forever) - ${testprog} 1 & + # run the test program in background (for 30 seconds) + ${testprog} 30 & TESTPID=$! rm -f "${perfdata}" @@ -205,8 +154,6 @@ test_workload() { echo "Basic target workload test [Success]" } -build_test_program - test_per_thread test_register_capture test_system_wide -- cgit v1.2.3 From 41522f7442905814c654dbe2ca7b8d3605c7e0cc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:47 -0800 Subject: perf test: Add 'leafloop' test workload The leafloop workload is to run an infinite loop in the test_leaf function. This is needed for the ARM fp callgraph test to verify if it gets the correct callchains. $ perf test -w leafloop Committer notes: Add a: -U_FORTIFY_SOURCE to the leafloop CFLAGS as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 3 +++ tools/perf/tests/workloads/leafloop.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+) create mode 100644 tools/perf/tests/workloads/leafloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 161f38476e77..0ed5ac452f6e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -121,6 +121,7 @@ static struct test_suite **tests[] = { static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, + &workload__leafloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e6edfeeadaeb..86804dd6452b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -202,5 +202,6 @@ struct test_workload workload__##work = { \ /* The list of test workloads */ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); +DECLARE_WORKLOAD(leafloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index b8964b1099c0..03dc675a4a7c 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -2,3 +2,6 @@ perf-y += noploop.o perf-y += thloop.o +perf-y += leafloop.o + +CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/leafloop.c b/tools/perf/tests/workloads/leafloop.c new file mode 100644 index 000000000000..1bf5cc97649b --- /dev/null +++ b/tools/perf/tests/workloads/leafloop.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include "../tests.h" + +/* We want to check these symbols in perf script */ +noinline void leaf(volatile int b); +noinline void parent(volatile int b); + +static volatile int a; + +noinline void leaf(volatile int b) +{ + for (;;) + a += b; +} + +noinline void parent(volatile int b) +{ + leaf(b); +} + +static int leafloop(int argc, const char **argv) +{ + int c = 1; + + if (argc > 0) + c = atoi(argv[0]); + + parent(c); + return 0; +} + +DEFINE_WORKLOAD(leafloop); -- cgit v1.2.3 From 7cf0b4a73a4a4f36bb4ef53d066b811b7621c635 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:48 -0800 Subject: perf test: Replace arm callgraph fp test workload with leafloop So that it can get rid of requirement of a compiler. Reviewed-by: Leo Yan Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_callgraph_fp.sh | 34 +++---------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index ec108d45d3c6..e61d8deaa0c4 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -4,44 +4,16 @@ lscpu | grep -q "aarch64" || exit 2 -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c) -TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX) +TEST_PROGRAM="perf test -w leafloop" cleanup_files() { rm -f $PERF_DATA - rm -f $TEST_PROGRAM_SOURCE - rm -f $TEST_PROGRAM } trap cleanup_files exit term int -cat << EOF > $TEST_PROGRAM_SOURCE -int a = 0; -void leaf(void) { - for (;;) - a += a; -} -void parent(void) { - leaf(); -} -int main(void) { - parent(); - return 0; -} -EOF - -echo " + Compiling test program ($TEST_PROGRAM)..." - -CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer" -cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1 - # Add a 1 second delay to skip samples that are not in the leaf() function perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null & PID=$! @@ -58,11 +30,11 @@ wait $PID # program # 728 leaf # 753 parent -# 76c main +# 76c leafloop # ... perf script -i $PERF_DATA -F comm,ip,sym | head -n4 perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \ awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" || sym[1] != "parent" || - sym[2] != "main") exit 1 }' + sym[2] != "leafloop") exit 1 }' -- cgit v1.2.3 From 39281709a6e2301ac4c6ac7015c7793392ca2dfe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:49 -0800 Subject: perf test: Add 'sqrtloop' test workload The sqrtloop creates a child process to run an infinite loop calling sqrt() with rand(). This is needed for ARM SPE fork test. $ perf test -w sqrtloop It can take an optional argument to specify how long it will run in seconds (default: 1). Committer notes: Explicitely ignored the sqrt() return to fix the build on systems where the compiler complains it isn't being used. And added a sqrtloop specific CFLAGS to disable optimizations to make this a bit more robust wrt dead code elimination. Doing that a -U_FORTIFY_SOURCE needs to be added, as -O0 is incompatible with it. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/sqrtloop.c | 45 +++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 tools/perf/tests/workloads/sqrtloop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 0ed5ac452f6e..9acb7a93eeb9 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -122,6 +122,7 @@ static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, &workload__leafloop, + &workload__sqrtloop, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 86804dd6452b..18c40319e67c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -203,5 +203,6 @@ struct test_workload workload__##work = { \ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); +DECLARE_WORKLOAD(sqrtloop); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index 03dc675a4a7c..2312a338f01c 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -3,5 +3,7 @@ perf-y += noploop.o perf-y += thloop.o perf-y += leafloop.o +perf-y += sqrtloop.o +CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/sqrtloop.c b/tools/perf/tests/workloads/sqrtloop.c new file mode 100644 index 000000000000..ccc94c6a6676 --- /dev/null +++ b/tools/perf/tests/workloads/sqrtloop.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include +#include "../tests.h" + +static volatile sig_atomic_t done; + +static void sighandler(int sig __maybe_unused) +{ + done = 1; +} + +static int __sqrtloop(int sec) +{ + signal(SIGALRM, sighandler); + alarm(sec); + + while (!done) + (void)sqrt(rand()); + return 0; +} + +static int sqrtloop(int argc, const char **argv) +{ + int sec = 1; + + if (argc > 0) + sec = atoi(argv[0]); + + switch (fork()) { + case 0: + return __sqrtloop(sec); + case -1: + return -1; + default: + wait(NULL); + } + return 0; +} + +DEFINE_WORKLOAD(sqrtloop); -- cgit v1.2.3 From e011979ec4c3482b68be05e2fdf98fefce4cd75b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:50 -0800 Subject: perf test: Replace arm spe fork test workload with sqrtloop So that it can get rid of requirement of a compiler. I've also removed killall as it'll kill perf process now and run the test workload for 10 sec instead. Signed-off-by: Namhyung Kim Tested-by: James Clark Tested-by: Leo Yan Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_spe_fork.sh | 44 +---------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index c920d3583d30..da810e1b2b9e 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -11,14 +11,7 @@ skip_if_no_arm_spe_event() { skip_if_no_arm_spe_event || exit 2 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - -TEST_PROGRAM_SOURCE=$(mktemp /tmp/__perf_test.program.XXXXX.c) -TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX) +TEST_PROGRAM="perf test -w sqrtloop 10" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) PERF_RECORD_LOG=$(mktemp /tmp/__perf_test.log.XXXXX) @@ -27,43 +20,10 @@ cleanup_files() echo "Cleaning up files..." rm -f ${PERF_RECORD_LOG} rm -f ${PERF_DATA} - rm -f ${TEST_PROGRAM_SOURCE} - rm -f ${TEST_PROGRAM} } trap cleanup_files exit term int -# compile test program -cat << EOF > $TEST_PROGRAM_SOURCE -#include -#include -#include -#include -#include - -int workload() { - while (1) - sqrt(rand()); - return 0; -} - -int main() { - switch (fork()) { - case 0: - return workload(); - case -1: - return 1; - default: - wait(NULL); - } - return 0; -} -EOF - -echo "Compiling test program..." -CFLAGS="-lm" -cc $TEST_PROGRAM_SOURCE $CFLAGS -o $TEST_PROGRAM || exit 1 - echo "Recording workload..." perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 & PERFPID=$! @@ -78,8 +38,6 @@ echo Log lines after 1 second = $log1 kill $PERFPID wait $PERFPID -# test program may leave an orphan process running the workload -killall $(basename $TEST_PROGRAM) if [ "$log0" = "$log1" ]; then -- cgit v1.2.3 From a104f0ea99d846df19aad8a5476eb9bc39fa42ca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:51 -0800 Subject: perf test: Add 'brstack' test workload The brstack is to run different kinds of branches repeatedly. This is necessary for brstack test case to verify if it has correct branch info. $ perf test -w brstack I renamed the internal functions to have brstack_ prefix as it's too generic name. Add a -U_FORTIFY_SOURCE to the brstack CFLAGS, as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/brstack.c | 40 ++++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 tools/perf/tests/workloads/brstack.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9acb7a93eeb9..69fa56939309 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -123,6 +123,7 @@ static struct test_workload *workloads[] = { &workload__thloop, &workload__leafloop, &workload__sqrtloop, + &workload__brstack, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 18c40319e67c..dc96f59cac2e 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -204,5 +204,6 @@ DECLARE_WORKLOAD(noploop); DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); +DECLARE_WORKLOAD(brstack); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index 2312a338f01c..ae06a5538b17 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -4,6 +4,8 @@ perf-y += noploop.o perf-y += thloop.o perf-y += leafloop.o perf-y += sqrtloop.o +perf-y += brstack.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE +CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/brstack.c b/tools/perf/tests/workloads/brstack.c new file mode 100644 index 000000000000..0b60bd37b9d1 --- /dev/null +++ b/tools/perf/tests/workloads/brstack.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include "../tests.h" + +#define BENCH_RUNS 999999 + +static volatile int cnt; + +static void brstack_bar(void) { +} /* return */ + +static void brstack_foo(void) { + brstack_bar(); /* call */ +} /* return */ + +static void brstack_bench(void) { + void (*brstack_foo_ind)(void) = brstack_foo; + + if ((cnt++) % 3) /* branch (cond) */ + brstack_foo(); /* call */ + brstack_bar(); /* call */ + brstack_foo_ind(); /* call (ind) */ +} + +static int brstack(int argc, const char **argv) +{ + int num_loops = BENCH_RUNS; + + if (argc > 0) + num_loops = atoi(argv[0]); + + while (1) { + if ((cnt++) > num_loops) + break; + brstack_bench();/* call */ + } /* branch (uncond) */ + return 0; +} + +DEFINE_WORKLOAD(brstack); -- cgit v1.2.3 From 7bc1dd96cf48e1b44773698e7c97481f5f455f6c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:52 -0800 Subject: perf test: Replace brstack test workload So that it can get rid of requirement of a compiler. Also rename the symbols to match with the perf test workload. Signed-off-by: Namhyung Kim Tested-by: James Clark Acked-by: German Gomez Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_brstack.sh | 66 +++++++--------------------------- 1 file changed, 12 insertions(+), 54 deletions(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index d7ff5c4b4da4..5856639b565b 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -4,13 +4,6 @@ # SPDX-License-Identifier: GPL-2.0 # German Gomez , 2022 -# we need a C compiler to build the test programs -# so bail if none is found -if ! [ -x "$(command -v cc)" ]; then - echo "failed: no compiler, install gcc" - exit 2 -fi - # skip the test if the hardware doesn't support branch stack sampling # and if the architecture doesn't support filter types: any,save_type,u if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then @@ -19,6 +12,7 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev fi TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) +TESTPROG="perf test -w brstack" cleanup() { rm -rf $TMPDIR @@ -26,57 +20,24 @@ cleanup() { trap cleanup exit term int -gen_test_program() { - # generate test program - cat << EOF > $1 -#define BENCH_RUNS 999999 -int cnt; -void bar(void) { -} /* return */ -void foo(void) { - bar(); /* call */ -} /* return */ -void bench(void) { - void (*foo_ind)(void) = foo; - if ((cnt++) % 3) /* branch (cond) */ - foo(); /* call */ - bar(); /* call */ - foo_ind(); /* call (ind) */ -} -int main(void) -{ - int cnt = 0; - while (1) { - if ((cnt++) > BENCH_RUNS) - break; - bench(); /* call */ - } /* branch (uncond) */ - return 0; -} -EOF -} - test_user_branches() { echo "Testing user branch stack sampling" - gen_test_program "$TEMPDIR/program.c" - cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out - - perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1 perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script # example of branch entries: - # foo+0x14/bar+0x40/P/-/-/0/CALL + # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL set -x - egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script - egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script - egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$" $TMPDIR/perf.script - egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$" $TMPDIR/perf.script - egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$" $TMPDIR/perf.script - egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script + egrep -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script set +x # some branch types are still not being tested: @@ -91,10 +52,7 @@ test_filter() { echo "Testing branch stack filtering permutation ($filter,$expect)" - gen_test_program "$TEMPDIR/program.c" - cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out - - perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones -- cgit v1.2.3 From 3dfc01fe9d12a1e832f49deab37279faa8a9ebc8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:53 -0800 Subject: perf test: Add 'datasym' test workload The datasym workload is to check if perf mem command gets the data addresses precisely. This is needed for data symbol test. $ perf test -w datasym I had to keep the buf1 in the data section, otherwise it could end up in the BSS and was mmaped as a separate //anon region, then it was not symbolized at all. It needs to be fixed separately. Committer notes: Add a -U _FORTIFY_SOURCE to the datasym CFLAGS, as the main perf flags set it and it requires building with optimization, and this new test has a -O0. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/datasym.c | 24 ++++++++++++++++++++++++ 4 files changed, 28 insertions(+) create mode 100644 tools/perf/tests/workloads/datasym.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 69fa56939309..4c6ae59a4dfd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -124,6 +124,7 @@ static struct test_workload *workloads[] = { &workload__leafloop, &workload__sqrtloop, &workload__brstack, + &workload__datasym, }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index dc96f59cac2e..e15f24cfc909 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -205,5 +205,6 @@ DECLARE_WORKLOAD(thloop); DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); DECLARE_WORKLOAD(brstack); +DECLARE_WORKLOAD(datasym); #endif /* TESTS_H */ diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index ae06a5538b17..a1f34d5861e3 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -5,7 +5,9 @@ perf-y += thloop.o perf-y += leafloop.o perf-y += sqrtloop.o perf-y += brstack.o +perf-y += datasym.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE +CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c new file mode 100644 index 000000000000..ddd40bc63448 --- /dev/null +++ b/tools/perf/tests/workloads/datasym.c @@ -0,0 +1,24 @@ +#include +#include "../tests.h" + +typedef struct _buf { + char data1; + char reserved[55]; + char data2; +} buf __attribute__((aligned(64))); + +static buf buf1 = { + /* to have this in the data section */ + .reserved[0] = 1, +}; + +static int datasym(int argc __maybe_unused, const char **argv __maybe_unused) +{ + for (;;) { + buf1.data1++; + buf1.data2 += buf1.data1; + } + return 0; +} + +DEFINE_WORKLOAD(datasym); -- cgit v1.2.3 From 0b77fe474696aaaa592d52c7316c135401337aec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 16 Nov 2022 15:38:54 -0800 Subject: perf test: Replace data symbol test workload with datasym So that it can get rid of requirement of a compiler. $ sudo ./perf test -v 109 109: Test data symbol : --- start --- test child forked, pid 844526 Recording workload... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.354 MB /tmp/__perf_test.perf.data.GFeZO (4847 samples) ] Cleaning up files... test child finished with 0 ---- end ---- Test data symbol: Ok Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Adrian Hunter Cc: Athira Jajeev Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Zhengjun Xing Link: https://lore.kernel.org/r/20221116233854.1596378-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_data_symbol.sh | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index cd6eb54d235d..d871e6c743ef 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -11,13 +11,7 @@ skip_if_no_mem_event() { skip_if_no_mem_event || exit 2 -# skip if there's no compiler -if ! [ -x "$(command -v cc)" ]; then - echo "skip: no compiler, install gcc" - exit 2 -fi - -TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX) +TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) check_result() { @@ -45,31 +39,10 @@ cleanup_files() { echo "Cleaning up files..." rm -f ${PERF_DATA} - rm -f ${TEST_PROGRAM} } trap cleanup_files exit term int -# compile test program -echo "Compiling test program..." -cat << EOF | cc -o ${TEST_PROGRAM} -x c - -typedef struct _buf { - char data1; - char reserved[55]; - char data2; -} buf __attribute__((aligned(64))); - -static buf buf1; - -int main(void) { - for (;;) { - buf1.data1++; - buf1.data2 += buf1.data1; - } - return 0; -} -EOF - echo "Recording workload..." # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support -- cgit v1.2.3 From a3720e969c6de39210809ca9aaebec81919d6c6c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Nov 2022 14:46:31 -0800 Subject: perf build: Fix LIBTRACEEVENT_DYNAMIC The tools/lib includes fixes break LIBTRACEVENT_DYNAMIC as the makefile erroneously had dependencies on building libtraceevent even when not linking with it. This change fixes the issues with LIBTRACEEVENT_DYNAMIC by making the built files optional. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Jiri Olsa Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221116224631.207631-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 25 ++++++++++++++++++++++--- tools/perf/util/setup.py | 3 ++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8c0df762fb02..a17a6ea85e81 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -242,8 +242,10 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ +ifndef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins +endif LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ @@ -293,6 +295,7 @@ SCRIPT_SH += perf-iostat.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) +ifndef LIBTRACEEVENT_DYNAMIC ifneq ($(OUTPUT),) LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent else @@ -306,13 +309,16 @@ LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include - # # The static build has no dynsym table, so this does not work for # static build. Looks like linker starts to scream about that now # (in Fedora 26) so we need to switch it off for static build. DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) +else +LIBTRACEEVENT_DYNAMIC_LIST = +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = +endif ifneq ($(OUTPUT),) LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi @@ -375,7 +381,11 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) +ifndef LIBTRACEEVENT_DYNAMIC PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) +else +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) +endif SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) @@ -785,9 +795,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ - $(LIBTRACEEVENT) \ bpf-skel +ifndef LIBTRACEEVENT_DYNAMIC +prepare: $(LIBTRACEEVENT) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -843,6 +856,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) +ifndef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' $(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) @@ -872,6 +886,7 @@ install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ $(LIBTRACEEVENT_FLAGS) install +endif $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ @@ -1152,7 +1167,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean +clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -1192,6 +1207,10 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, Documentation) \ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null +ifndef LIBTRACEEVENT_DYNAMIC +clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean +endif + # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) # file if defined, with no further action. diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 5b1e6468d5e8..43e7ca40b2ec 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -77,7 +77,8 @@ perf = Extension('perf', include_dirs = ['util/include'], libraries = extra_libraries, extra_compile_args = cflags, - extra_objects = [libtraceevent, libapikfs, libperf], + extra_objects = [ x for x in [libtraceevent, libapikfs, libperf] + if x is not None], ) setup(name='perf', -- cgit v1.2.3 From e5c6109f4813246aa21e2c441e3cde549efa1f18 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 14 Nov 2022 13:07:22 -0800 Subject: perf list: Reorganize to use callbacks to allow honouring command line options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than controlling the list output with passed flags, add callbacks that are called when an event or metric are encountered. State is passed to the callback so that command line options can be respected, alternatively the callbacks can be changed. Fix a few bugs: - wordwrap to columns metric descriptions and expressions; - remove unnecessary whitespace after PMU event names; - the metric filter is a glob but matched using strstr which will always fail, switch to using a proper globmatch, - the detail flag gives details for extra kernel PMU events like branch-instructions. In metricgroup.c switch from struct mep being a rbtree of metricgroups containing a list of metrics, to the tree directly containing all the metrics. In general the alias for a name is passed to the print routine rather than being contained in the name with OR. Committer notes: Check the asprint() return to address this on fedora 36: util/print-events.c: In function ‘print_sdt_events’: util/print-events.c:183:33: error: ignoring return value of ‘asprintf’ declared with attribute ‘warn_unused_result’ [-Werror=unused-result] 183 | asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors $ gcc --version | head -1 gcc (GCC) 12.2.1 20220819 (Red Hat 12.2.1-2) $ Fix ps.pmu_glob setting when dealing with *:* events, it was being left with a freed pointer that then at the end of cmd_list() would be double freed. Check if pmu_name is NULL in default_print_event() before calling strglobmatch(pmu_name, ...) to avoid a segfault. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 333 +++++++++++++++++++++++++++++++------ tools/perf/util/metricgroup.c | 243 ++++++++------------------- tools/perf/util/metricgroup.h | 4 +- tools/perf/util/pmu.c | 145 ++++++---------- tools/perf/util/pmu.h | 5 +- tools/perf/util/print-events.c | 364 ++++++++++++++++++++++------------------- tools/perf/util/print-events.h | 42 +++-- 7 files changed, 624 insertions(+), 512 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index cc84ced6da26..0c84fdb3ad37 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -15,31 +15,240 @@ #include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" +#include "util/string2.h" +#include "util/strlist.h" #include #include #include -static bool desc_flag = true; -static bool details_flag; +/** + * struct print_state - State and configuration passed to the default_print + * functions. + */ +struct print_state { + /** + * @pmu_glob: Optionally restrict PMU and metric matching to PMU or + * debugfs subsystem name. + */ + char *pmu_glob; + /** @event_glob: Optional pattern matching glob. */ + char *event_glob; + /** @name_only: Print event or metric names only. */ + bool name_only; + /** @desc: Print the event or metric description. */ + bool desc; + /** @long_desc: Print longer event or metric description. */ + bool long_desc; + /** @deprecated: Print deprecated events or metrics. */ + bool deprecated; + /** + * @detailed: Print extra information on the perf event such as names + * and expressions used internally by events. + */ + bool detailed; + /** @metrics: Controls printing of metric and metric groups. */ + bool metrics; + /** @metricgroups: Controls printing of metric and metric groups. */ + bool metricgroups; + /** @last_topic: The last printed event topic. */ + char *last_topic; + /** @last_metricgroups: The last printed metric group. */ + char *last_metricgroups; + /** @visited_metrics: Metrics that are printed to avoid duplicates. */ + struct strlist *visited_metrics; +}; + +static void default_print_start(void *ps) +{ + struct print_state *print_state = ps; + + if (!print_state->name_only && pager_in_use()) + printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); +} + +static void default_print_end(void *print_state __maybe_unused) {} + +static void wordwrap(const char *s, int start, int max, int corr) +{ + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, " \t"); + + if (column + wlen >= max && column > start) { + printf("\n%*s", start, ""); + column = start + corr; + } + n = printf("%s%.*s", column > start ? " " : "", wlen, s); + if (n <= 0) + break; + s += wlen; + column += n; + s = skip_spaces(s); + } +} + +static void default_print_event(void *ps, const char *pmu_name, const char *topic, + const char *event_name, const char *event_alias, + const char *scale_unit __maybe_unused, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr) +{ + struct print_state *print_state = ps; + int pos; + + if (deprecated && !print_state->deprecated) + return; + + if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob)) + return; + + if (print_state->event_glob && + (!event_name || !strglobmatch(event_name, print_state->event_glob)) && + (!event_alias || !strglobmatch(event_alias, print_state->event_glob)) && + (!topic || !strglobmatch_nocase(topic, print_state->event_glob))) + return; + + if (print_state->name_only) { + if (event_alias && strlen(event_alias)) + printf("%s ", event_alias); + else + printf("%s ", event_name); + return; + } + + if (strcmp(print_state->last_topic, topic ?: "")) { + if (topic) + printf("\n%s:\n", topic); + free(print_state->last_topic); + print_state->last_topic = strdup(topic ?: ""); + } + + if (event_alias && strlen(event_alias)) + pos = printf(" %s OR %s", event_name, event_alias); + else + pos = printf(" %s", event_name); + + if (!topic && event_type_desc) { + for (; pos < 53; pos++) + putchar(' '); + printf("[%s]\n", event_type_desc); + } else + putchar('\n'); + + if (desc && print_state->desc) { + printf("%*s", 8, "["); + wordwrap(desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + + if (long_desc && print_state->long_desc) { + printf("%*s", 8, "["); + wordwrap(long_desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + + if (print_state->detailed && encoding_desc) { + printf("%*s%s", 8, "", encoding_desc); + if (metric_name) + printf(" MetricName: %s", metric_name); + if (metric_expr) + printf(" MetricExpr: %s", metric_expr); + putchar('\n'); + } +} + +static void default_print_metric(void *ps, + const char *group, + const char *name, + const char *desc, + const char *long_desc, + const char *expr, + const char *unit __maybe_unused) +{ + struct print_state *print_state = ps; + + if (print_state->event_glob && + (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) && + (!print_state->metricgroups || !group || !strglobmatch(group, print_state->event_glob))) + return; + + if (!print_state->name_only && !print_state->last_metricgroups) { + if (print_state->metricgroups) { + printf("\nMetric Groups:\n"); + if (!print_state->metrics) + putchar('\n'); + } else { + printf("\nMetrics:\n\n"); + } + } + if (!print_state->last_metricgroups || + strcmp(print_state->last_metricgroups, group ?: "")) { + if (group && print_state->metricgroups) { + if (print_state->name_only) + printf("%s ", group); + else if (print_state->metrics) + printf("\n%s:\n", group); + else + printf("%s\n", group); + } + free(print_state->last_metricgroups); + print_state->last_metricgroups = strdup(group ?: ""); + } + if (!print_state->metrics) + return; + + if (print_state->name_only) { + if (print_state->metrics && + !strlist__has_entry(print_state->visited_metrics, name)) { + printf("%s ", name); + strlist__add(print_state->visited_metrics, name); + } + return; + } + printf(" %s\n", name); + + if (desc && print_state->desc) { + printf("%*s", 8, "["); + wordwrap(desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + if (long_desc && print_state->long_desc) { + printf("%*s", 8, "["); + wordwrap(long_desc, 8, pager_get_columns(), 0); + printf("]\n"); + } + if (expr && print_state->detailed) { + printf("%*s", 8, "["); + wordwrap(expr, 8, pager_get_columns(), 0); + printf("]\n"); + } +} int cmd_list(int argc, const char **argv) { int i, ret = 0; - bool raw_dump = false; - bool long_desc_flag = false; - bool deprecated = false; - char *pmu_name = NULL; + struct print_state ps = {}; + struct print_callbacks print_cb = { + .print_start = default_print_start, + .print_end = default_print_end, + .print_event = default_print_event, + .print_metric = default_print_metric, + }; const char *hybrid_name = NULL; const char *unit_name = NULL; struct option list_options[] = { - OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), - OPT_BOOLEAN('d', "desc", &desc_flag, + OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"), + OPT_BOOLEAN('d', "desc", &ps.desc, "Print extra event descriptions. --no-desc to not print."), - OPT_BOOLEAN('v', "long-desc", &long_desc_flag, + OPT_BOOLEAN('v', "long-desc", &ps.long_desc, "Print longer event descriptions."), - OPT_BOOLEAN(0, "details", &details_flag, + OPT_BOOLEAN(0, "details", &ps.detailed, "Print information on the perf event names and expressions used internally by events."), - OPT_BOOLEAN(0, "deprecated", &deprecated, + OPT_BOOLEAN(0, "deprecated", &ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), @@ -63,20 +272,28 @@ int cmd_list(int argc, const char **argv) setup_pager(); - if (!raw_dump && pager_in_use()) - printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); + if (!ps.name_only) + setup_pager(); + ps.desc = !ps.long_desc; + ps.last_topic = strdup(""); + assert(ps.last_topic); + ps.visited_metrics = strlist__new(NULL, NULL); + assert(ps.visited_metrics); if (unit_name) - pmu_name = strdup(unit_name); + ps.pmu_glob = strdup(unit_name); else if (hybrid_name) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name); - if (!pmu_name) + ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); + if (!ps.pmu_glob) pr_warning("WARNING: hybrid cputype is not supported!\n"); } + print_cb.print_start(&ps); + if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag, deprecated, pmu_name); + ps.metrics = true; + ps.metricgroups = true; + print_events(&print_cb, &ps); goto out; } @@ -84,31 +301,35 @@ int cmd_list(int argc, const char **argv) char *sep, *s; if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(NULL, NULL, raw_dump); + print_tracepoint_events(&print_cb, &ps); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_symbol_events(NULL, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); + print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { - print_symbol_events(NULL, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); - print_tool_events(NULL, raw_dump); + print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_tool_events(&print_cb, &ps); } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, raw_dump); + print_hwcache_events(&print_cb, &ps); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag, details_flag, - deprecated, pmu_name); + print_pmu_events(&print_cb, &ps); else if (strcmp(argv[i], "sdt") == 0) - print_sdt_events(NULL, NULL, raw_dump); - else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) - metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name); - else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) - metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name); - else if ((sep = strchr(argv[i], ':')) != NULL) { + print_sdt_events(&print_cb, &ps); + else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) { + ps.metricgroups = false; + ps.metrics = true; + metricgroup__print(&print_cb, &ps); + } else if (strcmp(argv[i], "metricgroup") == 0 || + strcmp(argv[i], "metricgroups") == 0) { + ps.metricgroups = true; + ps.metrics = false; + metricgroup__print(&print_cb, &ps); + } else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; + char *old_pmu_glob = ps.pmu_glob; sep_idx = sep - argv[i]; s = strdup(argv[i]); @@ -118,34 +339,42 @@ int cmd_list(int argc, const char **argv) } s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, raw_dump); - print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + ps.pmu_glob = s; + ps.event_glob = s + sep_idx + 1; + print_tracepoint_events(&print_cb, &ps); + print_sdt_events(&print_cb, &ps); + ps.metrics = true; + ps.metricgroups = true; + metricgroup__print(&print_cb, &ps); free(s); + ps.pmu_glob = old_pmu_glob; } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { printf("Critical: Not enough memory! Trying to continue...\n"); continue; } - print_symbol_events(s, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); - print_symbol_events(s, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); - print_tool_events(s, raw_dump); - print_hwcache_events(s, raw_dump); - print_pmu_events(s, raw_dump, !desc_flag, - long_desc_flag, - details_flag, - deprecated, - pmu_name); - print_tracepoint_events(NULL, s, raw_dump); - print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); + ps.event_glob = s; + print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_tool_events(&print_cb, &ps); + print_hwcache_events(&print_cb, &ps); + print_pmu_events(&print_cb, &ps); + print_tracepoint_events(&print_cb, &ps); + print_sdt_events(&print_cb, &ps); + ps.metrics = true; + ps.metricgroups = true; + metricgroup__print(&print_cb, &ps); free(s); } } out: - free(pmu_name); + print_cb.print_end(&ps); + free(ps.pmu_glob); + free(ps.last_topic); + free(ps.last_metricgroups); + strlist__delete(ps.visited_metrics); return ret; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index cf9e2452d322..6eac7a60ed27 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -12,6 +12,7 @@ #include "strbuf.h" #include "pmu.h" #include "pmu-hybrid.h" +#include "print-events.h" #include "expr.h" #include "rblist.h" #include @@ -353,51 +354,65 @@ static bool match_pe_metric(const struct pmu_event *pe, const char *metric) match_metric(pe->metric_name, metric); } +/** struct mep - RB-tree node for building printing information. */ struct mep { + /** nd - RB-tree element. */ struct rb_node nd; - const char *name; - struct strlist *metrics; + /** @metric_group: Owned metric group name, separated others with ';'. */ + char *metric_group; + const char *metric_name; + const char *metric_desc; + const char *metric_long_desc; + const char *metric_expr; + const char *metric_unit; }; static int mep_cmp(struct rb_node *rb_node, const void *entry) { struct mep *a = container_of(rb_node, struct mep, nd); struct mep *b = (struct mep *)entry; + int ret; - return strcmp(a->name, b->name); + ret = strcmp(a->metric_group, b->metric_group); + if (ret) + return ret; + + return strcmp(a->metric_name, b->metric_name); } -static struct rb_node *mep_new(struct rblist *rl __maybe_unused, - const void *entry) +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) { struct mep *me = malloc(sizeof(struct mep)); if (!me) return NULL; + memcpy(me, entry, sizeof(struct mep)); - me->name = strdup(me->name); - if (!me->name) - goto out_me; - me->metrics = strlist__new(NULL, NULL); - if (!me->metrics) - goto out_name; return &me->nd; -out_name: - zfree(&me->name); -out_me: +} + +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) +{ + struct mep *me = container_of(nd, struct mep, nd); + + zfree(&me->metric_group); free(me); - return NULL; } -static struct mep *mep_lookup(struct rblist *groups, const char *name) +static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, + const char *metric_name) { struct rb_node *nd; struct mep me = { - .name = name + .metric_group = strdup(metric_group), + .metric_name = metric_name, }; nd = rblist__find(groups, &me); - if (nd) + if (nd) { + free(me.metric_group); return container_of(nd, struct mep, nd); + } rblist__add_node(groups, &me); nd = rblist__find(groups, &me); if (nd) @@ -405,107 +420,37 @@ static struct mep *mep_lookup(struct rblist *groups, const char *name) return NULL; } -static void mep_delete(struct rblist *rl __maybe_unused, - struct rb_node *nd) -{ - struct mep *me = container_of(nd, struct mep, nd); - - strlist__delete(me->metrics); - zfree(&me->name); - free(me); -} - -static void metricgroup__print_strlist(struct strlist *metrics, bool raw) -{ - struct str_node *sn; - int n = 0; - - strlist__for_each_entry (sn, metrics) { - if (raw) - printf("%s%s", n > 0 ? " " : "", sn->s); - else - printf(" %s\n", sn->s); - n++; - } - if (raw) - putchar('\n'); -} - -static int metricgroup__print_pmu_event(const struct pmu_event *pe, - bool metricgroups, char *filter, - bool raw, bool details, - struct rblist *groups, - struct strlist *metriclist) +static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, + struct rblist *groups) { const char *g; char *omg, *mg; - g = pe->metric_group; - if (!g && pe->metric_name) { - if (pe->name) - return 0; - g = "No_group"; - } - - if (!g) - return 0; - - mg = strdup(g); - + mg = strdup(pe->metric_group ?: "No_group"); if (!mg) return -ENOMEM; omg = mg; while ((g = strsep(&mg, ";")) != NULL) { struct mep *me; - char *s; g = skip_spaces(g); - if (*g == 0) - g = "No_group"; - if (filter && !strstr(g, filter)) - continue; - if (raw) - s = (char *)pe->metric_name; - else { - if (asprintf(&s, "%s\n%*s%s]", - pe->metric_name, 8, "[", pe->desc) < 0) - return -1; - if (details) { - if (asprintf(&s, "%s\n%*s%s]", - s, 8, "[", pe->metric_expr) < 0) - return -1; - } - } - - if (!s) - continue; + if (strlen(g)) + me = mep_lookup(groups, g, pe->metric_name); + else + me = mep_lookup(groups, "No_group", pe->metric_name); - if (!metricgroups) { - strlist__add(metriclist, s); - } else { - me = mep_lookup(groups, g); - if (!me) - continue; - strlist__add(me->metrics, s); + if (me) { + me->metric_desc = pe->desc; + me->metric_long_desc = pe->long_desc; + me->metric_expr = pe->metric_expr; + me->metric_unit = pe->unit; } - - if (!raw) - free(s); } free(omg); return 0; } -struct metricgroup_print_sys_idata { - struct strlist *metriclist; - char *filter; - struct rblist *groups; - bool metricgroups; - bool raw; - bool details; -}; - struct metricgroup_iter_data { pmu_event_iter_fn fn; void *data; @@ -528,61 +473,26 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe, return d->fn(pe, table, d->data); } - return 0; } -static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *data) -{ - struct metricgroup_print_sys_idata *d = data; - - return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw, - d->details, d->groups, d->metriclist); -} - -struct metricgroup_print_data { - const char *pmu_name; - struct strlist *metriclist; - char *filter; - struct rblist *groups; - bool metricgroups; - bool raw; - bool details; -}; - -static int metricgroup__print_callback(const struct pmu_event *pe, - const struct pmu_events_table *table __maybe_unused, - void *vdata) +static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe, + const struct pmu_events_table *table __maybe_unused, + void *vdata) { - struct metricgroup_print_data *data = vdata; - const char *pmu = pe->pmu ?: "cpu"; + struct rblist *groups = vdata; - if (!pe->metric_expr) - return 0; - - if (data->pmu_name && strcmp(data->pmu_name, pmu)) + if (!pe->metric_name) return 0; - return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, - data->raw, data->details, data->groups, - data->metriclist); + return metricgroup__add_to_mep_groups(pe, groups); } -void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details, const char *pmu_name) +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) { struct rblist groups; - struct rb_node *node, *next; - struct strlist *metriclist = NULL; const struct pmu_events_table *table; - - if (!metricgroups) { - metriclist = strlist__new(NULL, NULL); - if (!metriclist) - return; - } + struct rb_node *node, *next; rblist__init(&groups); groups.node_new = mep_new; @@ -590,56 +500,31 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_delete = mep_delete; table = pmu_events_table__find(); if (table) { - struct metricgroup_print_data data = { - .pmu_name = pmu_name, - .metriclist = metriclist, - .metricgroups = metricgroups, - .filter = filter, - .raw = raw, - .details = details, - .groups = &groups, - }; - pmu_events_table_for_each_event(table, - metricgroup__print_callback, - &data); + metricgroup__add_to_mep_groups_callback, + &groups); } { struct metricgroup_iter_data data = { - .fn = metricgroup__print_sys_event_iter, - .data = (void *) &(struct metricgroup_print_sys_idata){ - .metriclist = metriclist, - .metricgroups = metricgroups, - .filter = filter, - .raw = raw, - .details = details, - .groups = &groups, - }, + .fn = metricgroup__add_to_mep_groups_callback, + .data = &groups, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } - if (!filter || !rblist__empty(&groups)) { - if (metricgroups && !raw) - printf("\nMetric Groups:\n\n"); - else if (metrics && !raw) - printf("\nMetrics:\n\n"); - } - for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); - if (metricgroups) - printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); - if (metrics) - metricgroup__print_strlist(me->metrics, raw); + print_cb->print_metric(print_state, + me->metric_group, + me->metric_name, + me->metric_desc, + me->metric_long_desc, + me->metric_expr, + me->metric_unit); next = rb_next(node); rblist__remove_node(&groups, node); } - if (!metricgroups) - metricgroup__print_strlist(metriclist, raw); - strlist__delete(metriclist); } static const char *code_characters = ",-=@"; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 732d3a0d3334..0013cf582173 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -10,6 +10,7 @@ struct evlist; struct evsel; struct option; +struct print_callbacks; struct rblist; struct cgroup; @@ -78,8 +79,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metric_no_merge, struct rblist *metric_events); -void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details, const char *pmu_name); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 075c82dd1347..e9a4f31926bf 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -23,6 +23,7 @@ #include "evsel.h" #include "pmu.h" #include "parse-events.h" +#include "print-events.h" #include "header.h" #include "string2.h" #include "strbuf.h" @@ -1579,13 +1580,6 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, return buf; } -static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu, - const struct perf_pmu_alias *alias) -{ - snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name); - return buf; -} - /** Struct for ordering events as output in perf list. */ struct sevent { /** PMU for event. */ @@ -1629,7 +1623,7 @@ static int cmp_sevent(const void *a, const void *b) /* Order CPU core events to be first */ if (as->is_cpu != bs->is_cpu) - return bs->is_cpu - as->is_cpu; + return as->is_cpu ? -1 : 1; /* Order by PMU name. */ a_pmu_name = as->pmu->name ?: ""; @@ -1642,27 +1636,6 @@ static int cmp_sevent(const void *a, const void *b) return strcmp(a_name, b_name); } -static void wordwrap(char *s, int start, int max, int corr) -{ - int column = start; - int n; - - while (*s) { - int wlen = strcspn(s, " \t"); - - if (column + wlen >= max && column > start) { - printf("\n%*s", start, ""); - column = start + corr; - } - n = printf("%s%.*s", column > start ? " " : "", wlen, s); - if (n <= 0) - break; - s += wlen; - column += n; - s = skip_spaces(s); - } -} - bool is_pmu_core(const char *name) { return !strcmp(name, "cpu") || is_arm_pmu_core(name); @@ -1685,24 +1658,19 @@ static bool pmu_alias_is_duplicate(struct sevent *alias_a, return strcmp(a_pmu_name, b_pmu_name) == 0; } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) +void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) { struct perf_pmu *pmu; - struct perf_pmu_alias *alias; + struct perf_pmu_alias *event; char buf[1024]; int printed = 0; int len, j; struct sevent *aliases; - int numdesc = 0; - int columns = pager_get_columns(); - char *topic = NULL; pmu = NULL; len = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) + list_for_each_entry(event, &pmu->aliases, list) len++; if (pmu->selectable) len++; @@ -1715,32 +1683,15 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - bool is_cpu; + bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); - if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) - continue; - - is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); - - list_for_each_entry(alias, &pmu->aliases, list) { - if (alias->deprecated && !deprecated) - continue; - - if (event_glob != NULL && - !(strglobmatch_nocase(alias->name, event_glob) || - (!is_cpu && - strglobmatch_nocase(alias->name, event_glob)) || - (alias->topic && - strglobmatch_nocase(alias->topic, event_glob)))) - continue; - - aliases[j].event = alias; + list_for_each_entry(event, &pmu->aliases, list) { + aliases[j].event = event; aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; j++; } - if (pmu->selectable && - (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { + if (pmu->selectable) { aliases[j].event = NULL; aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; @@ -1750,7 +1701,12 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { - char *name, *desc; + const char *name, *alias = NULL, *scale_unit = NULL, + *desc = NULL, *long_desc = NULL, + *encoding_desc = NULL, *topic = NULL, + *metric_name = NULL, *metric_expr = NULL; + bool deprecated = false; + size_t buf_used; /* Skip duplicates */ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) @@ -1758,48 +1714,51 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (!aliases[j].event) { /* A selectable event. */ - snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name); + buf_used = snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name) + 1; name = buf; - } else if (aliases[j].event->desc) { - name = aliases[j].event->name; } else { - if (!name_only && aliases[j].is_cpu) { - name = format_alias_or(buf, sizeof(buf), aliases[j].pmu, - aliases[j].event); + if (aliases[j].event->desc) { + name = aliases[j].event->name; + buf_used = 0; } else { name = format_alias(buf, sizeof(buf), aliases[j].pmu, aliases[j].event); + if (aliases[j].is_cpu) { + alias = name; + name = aliases[j].event->name; + } + buf_used = strlen(buf) + 1; } - } - if (name_only) { - printf("%s ", name); - continue; - } - printed++; - if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) { - printf(" %-50s [Kernel PMU event]\n", name); - continue; - } - if (numdesc++ == 0) - printf("\n"); - if (aliases[j].event->topic && (!topic || - strcmp(topic, aliases[j].event->topic))) { - printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic); + if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) { + scale_unit = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%G%s", aliases[j].event->scale, + aliases[j].event->unit) + 1; + } + desc = aliases[j].event->desc; + long_desc = aliases[j].event->long_desc; topic = aliases[j].event->topic; + encoding_desc = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%s/%s/", aliases[j].pmu->name, + aliases[j].event->str) + 1; + metric_name = aliases[j].event->metric_name; + metric_expr = aliases[j].event->metric_expr; + deprecated = aliases[j].event->deprecated; } - printf(" %-50s\n", name); - printf("%*s", 8, "["); - desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc; - wordwrap(desc, 8, columns, 0); - printf("]\n"); - if (details_flag) { - printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str); - if (aliases[j].event->metric_name) - printf(" MetricName: %s", aliases[j].event->metric_name); - if (aliases[j].event->metric_expr) - printf(" MetricExpr: %s", aliases[j].event->metric_expr); - putchar('\n'); - } + print_cb->print_event(print_state, + aliases[j].pmu->name, + topic, + name, + alias, + scale_unit, + deprecated, + "Kernel PMU event", + desc, + long_desc, + encoding_desc, + metric_name, + metric_expr); } if (printed && pager_in_use()) printf("\n"); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ee02e1ef9187..69ca0004f94f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -12,6 +12,7 @@ struct evsel_config_term; struct perf_cpu_map; +struct print_callbacks; enum { PERF_PMU_FORMAT_VALUE_CONFIG, @@ -225,9 +226,7 @@ void perf_pmu__del_formats(struct list_head *formats); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); -void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, - bool deprecated, const char *pmu_name); +void print_pmu_events(const struct print_callbacks *print_cb, void *print_state); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index d53dba033597..2646ae18d9f9 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -28,6 +28,7 @@ #define MAX_NAME_LEN 100 +/** Strings corresponding to enum perf_type_id. */ static const char * const event_type_descriptors[] = { "Hardware event", "Software event", @@ -55,11 +56,9 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { /* * Print the events from /tracing/events */ -void print_tracepoint_events(const char *subsys_glob, - const char *event_glob, bool name_only) +void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state) { struct dirent **sys_namelist = NULL; - bool printed = false; int sys_items = tracing_events__scandir_alphasort(&sys_namelist); for (int i = 0; i < sys_items; i++) { @@ -73,10 +72,6 @@ void print_tracepoint_events(const char *subsys_glob, !strcmp(sys_dirent->d_name, "..")) continue; - if (subsys_glob != NULL && - !strglobmatch(sys_dirent->d_name, subsys_glob)) - continue; - dir_path = get_events_file(sys_dirent->d_name); if (!dir_path) continue; @@ -94,41 +89,41 @@ void print_tracepoint_events(const char *subsys_glob, if (tp_event_has_id(dir_path, evt_dirent) != 0) continue; - if (event_glob != NULL && - !strglobmatch(evt_dirent->d_name, event_glob)) - continue; - snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); - if (name_only) - printf("%s ", evt_path); - else { - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - } - printed = true; + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + evt_path, + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + "Tracepoint event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } free(dir_path); free(evt_namelist); } free(sys_namelist); - if (printed && pager_in_use()) - printf("\n"); } -void print_sdt_events(const char *subsys_glob, const char *event_glob, - bool name_only) +void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) { - struct probe_cache *pcache; - struct probe_cache_entry *ent; struct strlist *bidlist, *sdtlist; - struct strlist_config cfg = {.dont_dupstr = true}; - struct str_node *nd, *nd2; - char *buf, *path, *ptr = NULL; - bool show_detail = false; - int ret; - - sdtlist = strlist__new(NULL, &cfg); + struct str_node *bid_nd, *sdt_name, *next_sdt_name; + const char *last_sdt_name = NULL; + + /* + * The implicitly sorted sdtlist will hold the tracepoint name followed + * by @. If the tracepoint name is unique (determined by + * looking at the adjacent nodes) the @ is dropped otherwise + * the executable path and buildid are added to the name. + */ + sdtlist = strlist__new(NULL, NULL); if (!sdtlist) { pr_debug("Failed to allocate new strlist for SDT\n"); return; @@ -138,65 +133,78 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, pr_debug("Failed to get buildids: %d\n", errno); return; } - strlist__for_each_entry(nd, bidlist) { - pcache = probe_cache__new(nd->s, NULL); + strlist__for_each_entry(bid_nd, bidlist) { + struct probe_cache *pcache; + struct probe_cache_entry *ent; + + pcache = probe_cache__new(bid_nd->s, NULL); if (!pcache) continue; list_for_each_entry(ent, &pcache->entries, node) { - if (!ent->sdt) - continue; - if (subsys_glob && - !strglobmatch(ent->pev.group, subsys_glob)) - continue; - if (event_glob && - !strglobmatch(ent->pev.event, event_glob)) - continue; - ret = asprintf(&buf, "%s:%s@%s", ent->pev.group, - ent->pev.event, nd->s); - if (ret > 0) - strlist__add(sdtlist, buf); + char buf[1024]; + + snprintf(buf, sizeof(buf), "%s:%s@%s", + ent->pev.group, ent->pev.event, bid_nd->s); + strlist__add(sdtlist, buf); } probe_cache__delete(pcache); } strlist__delete(bidlist); - strlist__for_each_entry(nd, sdtlist) { - buf = strchr(nd->s, '@'); - if (buf) - *(buf++) = '\0'; - if (name_only) { - printf("%s ", nd->s); - continue; - } - nd2 = strlist__next(nd); - if (nd2) { - ptr = strchr(nd2->s, '@'); - if (ptr) - *ptr = '\0'; - if (strcmp(nd->s, nd2->s) == 0) - show_detail = true; + strlist__for_each_entry(sdt_name, sdtlist) { + bool show_detail = false; + char *bid = strchr(sdt_name->s, '@'); + char *evt_name = NULL; + + if (bid) + *(bid++) = '\0'; + + if (last_sdt_name && !strcmp(last_sdt_name, sdt_name->s)) { + show_detail = true; + } else { + next_sdt_name = strlist__next(sdt_name); + if (next_sdt_name) { + char *bid2 = strchr(next_sdt_name->s, '@'); + + if (bid2) + *bid2 = '\0'; + if (strcmp(sdt_name->s, next_sdt_name->s) == 0) + show_detail = true; + if (bid2) + *bid2 = '@'; + } } + last_sdt_name = sdt_name->s; + if (show_detail) { - path = build_id_cache__origname(buf); - ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf); - if (ret > 0) { - printf(" %-50s [%s]\n", buf, "SDT event"); - free(buf); + char *path = build_id_cache__origname(bid); + + if (path) { + if (asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid) < 0) + evt_name = NULL; + free(path); } - free(path); - } else - printf(" %-50s [%s]\n", nd->s, "SDT event"); - if (nd2) { - if (strcmp(nd->s, nd2->s) != 0) - show_detail = false; - if (ptr) - *ptr = '@'; } + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + evt_name ?: sdt_name->s, + /*event_alias=*/NULL, + /*deprecated=*/false, + /*scale_unit=*/NULL, + "SDT event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + + free(evt_name); } strlist__delete(sdtlist); } -int print_hwcache_events(const char *event_glob, bool name_only) +int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) { struct strlist *evt_name_list = strlist__new(NULL, NULL); struct str_node *nd; @@ -216,9 +224,6 @@ int print_hwcache_events(const char *event_glob, bool name_only) char name[64]; __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (event_glob != NULL && !strglobmatch(name, event_glob)) - continue; - if (!perf_pmu__has_hybrid()) { if (is_event_supported(PERF_TYPE_HW_CACHE, type | (op << 8) | (i << 16))) @@ -240,55 +245,47 @@ int print_hwcache_events(const char *event_glob, bool name_only) } strlist__for_each_entry(nd, evt_name_list) { - if (name_only) { - printf("%s ", nd->s); - continue; - } - printf(" %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]); + print_cb->print_event(print_state, + "cache", + /*pmu_name=*/NULL, + nd->s, + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_HW_CACHE], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } - if (!strlist__empty(evt_name_list) && pager_in_use()) - printf("\n"); - strlist__delete(evt_name_list); return 0; } -static void print_tool_event(const struct event_symbol *syms, const char *event_glob, - bool name_only) -{ - if (syms->symbol == NULL) - return; - - if (event_glob && !(strglobmatch(syms->symbol, event_glob) || - (syms->alias && strglobmatch(syms->alias, event_glob)))) - return; - - if (name_only) - printf("%s ", syms->symbol); - else { - char name[MAX_NAME_LEN]; - - if (syms->alias && strlen(syms->alias)) - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); - else - strlcpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, "Tool event"); - } -} - -void print_tool_events(const char *event_glob, bool name_only) +void print_tool_events(const struct print_callbacks *print_cb, void *print_state) { // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) - print_tool_event(event_symbols_tool + i, event_glob, name_only); - - if (pager_in_use()) - printf("\n"); + for (int i = 1; i < PERF_TOOL_MAX; ++i) { + print_cb->print_event(print_state, + "tool", + /*pmu_name=*/NULL, + event_symbols_tool[i].symbol, + event_symbols_tool[i].alias, + /*scale_unit=*/NULL, + /*deprecated=*/false, + "Tool event", + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + } } -void print_symbol_events(const char *event_glob, unsigned int type, - struct event_symbol *syms, unsigned int max, - bool name_only) +void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, + unsigned int type, const struct event_symbol *syms, + unsigned int max) { struct strlist *evt_name_list = strlist__new(NULL, NULL); struct str_node *nd; @@ -305,10 +302,6 @@ void print_symbol_events(const char *event_glob, unsigned int type, if (syms[i].symbol == NULL) continue; - if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) || - (syms[i].alias && strglobmatch(syms[i].alias, event_glob)))) - continue; - if (!is_event_supported(type, i)) continue; @@ -322,63 +315,92 @@ void print_symbol_events(const char *event_glob, unsigned int type, } strlist__for_each_entry(nd, evt_name_list) { - if (name_only) { - printf("%s ", nd->s); - continue; + char *alias = strstr(nd->s, " OR "); + + if (alias) { + *alias = '\0'; + alias += 4; } - printf(" %-50s [%s]\n", nd->s, event_type_descriptors[type]); + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + nd->s, + alias, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[type], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); } - if (!strlist__empty(evt_name_list) && pager_in_use()) - printf("\n"); - strlist__delete(evt_name_list); } /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name) +void print_events(const struct print_callbacks *print_cb, void *print_state) { - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, name_only); - - print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, name_only); - print_tool_events(event_glob, name_only); - - print_hwcache_events(event_glob, name_only); - - print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated, pmu_name); - - if (event_glob != NULL) - return; - - if (!name_only) { - printf(" %-50s [%s]\n", - "rNNN", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" %-50s [%s]\n", - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - event_type_descriptors[PERF_TYPE_RAW]); - if (pager_in_use()) - printf(" (see 'man perf-list' on how to encode it)\n\n"); - - printf(" %-50s [%s]\n", - "mem:[/len][:access]", - event_type_descriptors[PERF_TYPE_BREAKPOINT]); - if (pager_in_use()) - printf("\n"); - } - - print_tracepoint_events(NULL, NULL, name_only); - - print_sdt_events(NULL, NULL, name_only); - - metricgroup__print(true, true, NULL, name_only, details_flag, - pmu_name); - - print_libpfm_events(name_only, long_desc); + print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + + print_tool_events(print_cb, print_state); + + print_hwcache_events(print_cb, print_state); + + print_pmu_events(print_cb, print_state); + + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "rNNN", + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_RAW], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_RAW], + "(see 'man perf-list' on how to encode it)", + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + "mem:[/len][:access]", + /*scale_unit=*/NULL, + /*event_alias=*/NULL, + /*deprecated=*/false, + event_type_descriptors[PERF_TYPE_BREAKPOINT], + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL, + /*metric_name=*/NULL, + /*metric_expr=*/NULL); + + print_tracepoint_events(print_cb, print_state); + + print_sdt_events(print_cb, print_state); + + metricgroup__print(print_cb, print_state); + + print_libpfm_events(print_cb, print_state); } diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 1da9910d83a6..c237e53c4487 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -2,21 +2,39 @@ #ifndef __PERF_PRINT_EVENTS_H #define __PERF_PRINT_EVENTS_H +#include #include struct event_symbol; -void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated, - const char *pmu_name); -int print_hwcache_events(const char *event_glob, bool name_only); -void print_sdt_events(const char *subsys_glob, const char *event_glob, - bool name_only); -void print_symbol_events(const char *event_glob, unsigned int type, - struct event_symbol *syms, unsigned int max, - bool name_only); -void print_tool_events(const char *event_glob, bool name_only); -void print_tracepoint_events(const char *subsys_glob, const char *event_glob, - bool name_only); +struct print_callbacks { + void (*print_start)(void *print_state); + void (*print_end)(void *print_state); + void (*print_event)(void *print_state, const char *topic, + const char *pmu_name, + const char *event_name, const char *event_alias, + const char *scale_unit, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr); + void (*print_metric)(void *print_state, + const char *group, + const char *name, + const char *desc, + const char *long_desc, + const char *expr, + const char *unit); +}; + +/** Print all events, the default when no options are specified. */ +void print_events(const struct print_callbacks *print_cb, void *print_state); +int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state); +void print_sdt_events(const struct print_callbacks *print_cb, void *print_state); +void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, + unsigned int type, const struct event_symbol *syms, + unsigned int max); +void print_tool_events(const struct print_callbacks *print_cb, void *print_state); +void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); #endif /* __PERF_PRINT_EVENTS_H */ -- cgit v1.2.3 From 6ed249441a7d3ead8e81cc926e68d5e7ae031032 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 13:43:37 -0300 Subject: perf list: Add JSON output option Output events and metrics in a JSON format by overriding the print callbacks. Currently other command line options aren't supported and metrics are repeated once per metric group. Committer testing: $ perf list cache List of pre-defined events (to be used in -e or -M): L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-prefetches [Hardware cache event] L1-icache-load-misses [Hardware cache event] L1-icache-loads [Hardware cache event] branch-load-misses [Hardware cache event] branch-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] iTLB-loads [Hardware cache event] $ perf list --json cache [ { "Unit": "cache", "EventName": "L1-dcache-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-dcache-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-dcache-prefetches", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-icache-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "L1-icache-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "branch-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "branch-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "dTLB-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "dTLB-loads", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "iTLB-load-misses", "EventType": "Hardware cache event" }, { "Unit": "cache", "EventName": "iTLB-loads", "EventType": "Hardware cache event" } ] $ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/20221114210723.2749751-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 + tools/perf/builtin-list.c | 308 ++++++++++++++++++++++++++------- 2 files changed, 245 insertions(+), 67 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 44a819af573d..c5a3cb0f57c7 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -43,6 +43,10 @@ Print deprecated events. By default the deprecated events are hidden. Print PMU events and metrics limited to the specific PMU name. (e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom) +-j:: +--json:: +Output in JSON format. + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 0c84fdb3ad37..84fa2d050eac 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,6 +19,8 @@ #include "util/strlist.h" #include #include +#include +#include #include /** @@ -228,10 +230,176 @@ static void default_print_metric(void *ps, } } +struct json_print_state { + /** Should a separator be printed prior to the next item? */ + bool need_sep; +}; + +static void json_print_start(void *print_state __maybe_unused) +{ + printf("[\n"); +} + +static void json_print_end(void *ps) +{ + struct json_print_state *print_state = ps; + + printf("%s]\n", print_state->need_sep ? "\n" : ""); +} + +static void fix_escape_printf(const char *fmt, ...) +{ + va_list args; + char buf[2048]; + size_t buf_pos = 0; + + va_start(args, fmt); + for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) { + switch (fmt[fmt_pos]) { + case '%': { + const char *s = va_arg(args, const char*); + + fmt_pos++; + assert(fmt[fmt_pos] == 's'); + for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { + switch (s[s_pos]) { + case '\\': + __fallthrough; + case '\"': + buf[buf_pos++] = '\\'; + assert(buf_pos < sizeof(buf)); + __fallthrough; + default: + buf[buf_pos++] = s[s_pos]; + assert(buf_pos < sizeof(buf)); + break; + } + } + break; + } + default: + buf[buf_pos++] = fmt[fmt_pos]; + assert(buf_pos < sizeof(buf)); + break; + } + } + va_end(args); + buf[buf_pos] = '\0'; + fputs(buf, stdout); +} + +static void json_print_event(void *ps, const char *pmu_name, const char *topic, + const char *event_name, const char *event_alias, + const char *scale_unit, + bool deprecated, const char *event_type_desc, + const char *desc, const char *long_desc, + const char *encoding_desc, + const char *metric_name, const char *metric_expr) +{ + struct json_print_state *print_state = ps; + bool need_sep = false; + + printf("%s{\n", print_state->need_sep ? ",\n" : ""); + print_state->need_sep = true; + if (pmu_name) { + fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name); + need_sep = true; + } + if (topic) { + fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic); + need_sep = true; + } + if (event_name) { + fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name); + need_sep = true; + } + if (event_alias && strlen(event_alias)) { + fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias); + need_sep = true; + } + if (scale_unit && strlen(scale_unit)) { + fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", + scale_unit); + need_sep = true; + } + if (event_type_desc) { + fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "", + event_type_desc); + need_sep = true; + } + if (deprecated) { + fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "", + deprecated ? "1" : "0"); + need_sep = true; + } + if (desc) { + fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + need_sep = true; + } + if (long_desc) { + fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + long_desc); + need_sep = true; + } + if (encoding_desc) { + fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc); + need_sep = true; + } + if (metric_name) { + fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name); + need_sep = true; + } + if (metric_expr) { + fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr); + need_sep = true; + } + printf("%s}", need_sep ? "\n" : ""); +} + +static void json_print_metric(void *ps __maybe_unused, const char *group, + const char *name, const char *desc, + const char *long_desc, const char *expr, + const char *unit) +{ + struct json_print_state *print_state = ps; + bool need_sep = false; + + printf("%s{\n", print_state->need_sep ? ",\n" : ""); + print_state->need_sep = true; + if (group) { + fix_escape_printf("\t\"MetricGroup\": \"%s\"", group); + need_sep = true; + } + if (name) { + fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name); + need_sep = true; + } + if (expr) { + fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr); + need_sep = true; + } + if (unit) { + fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit); + need_sep = true; + } + if (desc) { + fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + need_sep = true; + } + if (long_desc) { + fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + long_desc); + need_sep = true; + } + printf("%s}", need_sep ? "\n" : ""); +} + int cmd_list(int argc, const char **argv) { int i, ret = 0; - struct print_state ps = {}; + struct print_state default_ps = {}; + struct print_state json_ps = {}; + void *ps = &default_ps; struct print_callbacks print_cb = { .print_start = default_print_start, .print_end = default_print_end, @@ -240,15 +408,17 @@ int cmd_list(int argc, const char **argv) }; const char *hybrid_name = NULL; const char *unit_name = NULL; + bool json = false; struct option list_options[] = { - OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"), - OPT_BOOLEAN('d', "desc", &ps.desc, + OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"), + OPT_BOOLEAN('j', "json", &json, "JSON encode events and metrics"), + OPT_BOOLEAN('d', "desc", &default_ps.desc, "Print extra event descriptions. --no-desc to not print."), - OPT_BOOLEAN('v', "long-desc", &ps.long_desc, + OPT_BOOLEAN('v', "long-desc", &default_ps.long_desc, "Print longer event descriptions."), - OPT_BOOLEAN(0, "details", &ps.detailed, + OPT_BOOLEAN(0, "details", &default_ps.detailed, "Print information on the perf event names and expressions used internally by events."), - OPT_BOOLEAN(0, "deprecated", &ps.deprecated, + OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), @@ -272,28 +442,37 @@ int cmd_list(int argc, const char **argv) setup_pager(); - if (!ps.name_only) + if (!default_ps.name_only) setup_pager(); - ps.desc = !ps.long_desc; - ps.last_topic = strdup(""); - assert(ps.last_topic); - ps.visited_metrics = strlist__new(NULL, NULL); - assert(ps.visited_metrics); - if (unit_name) - ps.pmu_glob = strdup(unit_name); - else if (hybrid_name) { - ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); - if (!ps.pmu_glob) - pr_warning("WARNING: hybrid cputype is not supported!\n"); + if (json) { + print_cb = (struct print_callbacks){ + .print_start = json_print_start, + .print_end = json_print_end, + .print_event = json_print_event, + .print_metric = json_print_metric, + }; + ps = &json_ps; + } else { + default_ps.desc = !default_ps.long_desc; + default_ps.last_topic = strdup(""); + assert(default_ps.last_topic); + default_ps.visited_metrics = strlist__new(NULL, NULL); + assert(default_ps.visited_metrics); + if (unit_name) + default_ps.pmu_glob = strdup(unit_name); + else if (hybrid_name) { + default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name); + if (!default_ps.pmu_glob) + pr_warning("WARNING: hybrid cputype is not supported!\n"); + } } - - print_cb.print_start(&ps); + print_cb.print_start(ps); if (argc == 0) { - ps.metrics = true; - ps.metricgroups = true; - print_events(&print_cb, &ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + print_events(&print_cb, ps); goto out; } @@ -301,80 +480,75 @@ int cmd_list(int argc, const char **argv) char *sep, *s; if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(&print_cb, &ps); + print_tracepoint_events(&print_cb, ps); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { - print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, &ps); + print_tool_events(&print_cb, ps); } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(&print_cb, &ps); + print_hwcache_events(&print_cb, ps); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(&print_cb, &ps); + print_pmu_events(&print_cb, ps); else if (strcmp(argv[i], "sdt") == 0) - print_sdt_events(&print_cb, &ps); + print_sdt_events(&print_cb, ps); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) { - ps.metricgroups = false; - ps.metrics = true; - metricgroup__print(&print_cb, &ps); + default_ps.metricgroups = false; + default_ps.metrics = true; + metricgroup__print(&print_cb, ps); } else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) { - ps.metricgroups = true; - ps.metrics = false; - metricgroup__print(&print_cb, &ps); + default_ps.metricgroups = true; + default_ps.metrics = false; + metricgroup__print(&print_cb, ps); } else if ((sep = strchr(argv[i], ':')) != NULL) { - int sep_idx; - char *old_pmu_glob = ps.pmu_glob; + char *old_pmu_glob = default_ps.pmu_glob; - sep_idx = sep - argv[i]; - s = strdup(argv[i]); - if (s == NULL) { + default_ps.event_glob = strdup(argv[i]); + if (!default_ps.event_glob) { ret = -1; goto out; } - s[sep_idx] = '\0'; - ps.pmu_glob = s; - ps.event_glob = s + sep_idx + 1; - print_tracepoint_events(&print_cb, &ps); - print_sdt_events(&print_cb, &ps); - ps.metrics = true; - ps.metricgroups = true; - metricgroup__print(&print_cb, &ps); - free(s); - ps.pmu_glob = old_pmu_glob; + print_tracepoint_events(&print_cb, ps); + print_sdt_events(&print_cb, ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + metricgroup__print(&print_cb, ps); + zfree(&default_ps.event_glob); + default_ps.pmu_glob = old_pmu_glob; } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { printf("Critical: Not enough memory! Trying to continue...\n"); continue; } - ps.event_glob = s; - print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE, + default_ps.event_glob = s; + print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE, + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, &ps); - print_hwcache_events(&print_cb, &ps); - print_pmu_events(&print_cb, &ps); - print_tracepoint_events(&print_cb, &ps); - print_sdt_events(&print_cb, &ps); - ps.metrics = true; - ps.metricgroups = true; - metricgroup__print(&print_cb, &ps); + print_tool_events(&print_cb, ps); + print_hwcache_events(&print_cb, ps); + print_pmu_events(&print_cb, ps); + print_tracepoint_events(&print_cb, ps); + print_sdt_events(&print_cb, ps); + default_ps.metrics = true; + default_ps.metricgroups = true; + metricgroup__print(&print_cb, ps); free(s); } } out: - print_cb.print_end(&ps); - free(ps.pmu_glob); - free(ps.last_topic); - free(ps.last_metricgroups); - strlist__delete(ps.visited_metrics); + print_cb.print_end(ps); + free(default_ps.pmu_glob); + free(default_ps.last_topic); + free(default_ps.last_metricgroups); + strlist__delete(default_ps.visited_metrics); return ret; } -- cgit v1.2.3 From eadcab4c7a66e1df03d32da0db55d89fd9343fcc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:33 +0000 Subject: perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number This patch defines a macro RAW_SYSCALL_ARGS_NUM to replace the open coded number '6'. Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3257da5cad23..22008a31684b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -88,6 +88,8 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +#define RAW_SYSCALL_ARGS_NUM 6 + /* * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 */ @@ -108,7 +110,7 @@ struct syscall_fmt { const char *sys_enter, *sys_exit; } bpf_prog_name; - struct syscall_arg_fmt arg[6]; + struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM]; u8 nr_args; bool errpid; bool timeout; @@ -1229,7 +1231,7 @@ struct syscall { */ struct bpf_map_syscall_entry { bool enabled; - u16 string_args_len[6]; + u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; }; /* @@ -1661,7 +1663,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) { int idx; - if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) + if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0) nr_args = sc->fmt->nr_args; sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); @@ -1812,7 +1814,8 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; if (IS_ERR(sc->tp_format)) -- cgit v1.2.3 From d4223e1776c30b2ce8d0e6eaadcbf696e60fca3c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:34 +0000 Subject: perf trace: Return error if a system call doesn't exist When a system call is not detected, the reason is either because the system call ID is out of scope or failure to find the corresponding path in the sysfs, trace__read_syscall_info() returns zero. Finally, without returning an error value it introduces confusion for the caller. This patch lets the function trace__read_syscall_info() to return -EEXIST when a system call doesn't exist. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 22008a31684b..bd5513b15cde 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1796,11 +1796,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) #endif sc = trace->syscalls.table + id; if (sc->nonexistent) - return 0; + return -EEXIST; if (name == NULL) { sc->nonexistent = true; - return 0; + return -EEXIST; } sc->name = name; -- cgit v1.2.3 From 03e9a5d8eb552a1bf692a9c8a5ecd50f4e428006 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:35 +0000 Subject: perf trace: Handle failure when trace point folder is missed On Arm64 a case is perf tools fails to find the corresponding trace point folder for system calls listed in the table 'syscalltbl_arm64', e.g. the generated system call table contains "lookup_dcookie" but we cannot find out the matched trace point folder for it. We need to figure out if there have any issue for the generated system call table, on the other hand, we need to handle the case when trace point folder is missed under sysfs, this patch sets the flag syscall::nonexistent as true and returns the error from trace__read_syscall_info(). Another problem is for trace__syscall_info(), it returns two different values if a system call doesn't exist: at the first time calling trace__syscall_info() it returns NULL when the system call doesn't exist, later if call trace__syscall_info() again for the same missed system call, it returns pointer of syscall. trace__syscall_info() checks the condition 'syscalls.table[id].name == NULL', but the name will be assigned in the first invoking even the system call is not found. So checking system call's name in trace__syscall_info() is not the right thing to do, this patch simply checks flag syscall::nonexistent to make decision if a system call exists or not, finally trace__syscall_info() returns the consistent result (NULL) if a system call doesn't existed. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bd5513b15cde..071e7598391f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1814,13 +1814,19 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } + /* + * Fails to read trace point format via sysfs node, so the trace point + * doesn't exist. Set the 'nonexistent' flag as true. + */ + if (IS_ERR(sc->tp_format)) { + sc->nonexistent = true; + return PTR_ERR(sc->tp_format); + } + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; - if (IS_ERR(sc->tp_format)) - return PTR_ERR(sc->tp_format); - sc->args = sc->tp_format->format.fields; /* * We need to check and discard the first variable '__syscall_nr' @@ -2137,11 +2143,8 @@ static struct syscall *trace__syscall_info(struct trace *trace, (err = trace__read_syscall_info(trace, id)) != 0) goto out_cant_read; - if (trace->syscalls.table[id].name == NULL) { - if (trace->syscalls.table[id].nonexistent) - return NULL; + if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) goto out_cant_read; - } return &trace->syscalls.table[id]; -- cgit v1.2.3 From 9bc427a0613da358f56fe499c690d97ce5d1af26 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:36 +0000 Subject: perf augmented_raw_syscalls: Remove unused variable 'syscall' The local variable 'syscall' is not used anymore, remove it. Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/augmented_raw_syscalls.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 7dc24c9173a7..4203f92c063b 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -389,7 +389,6 @@ int sys_enter(struct syscall_enter_args *args) * initial, non-augmented raw_syscalls:sys_enter payload. */ unsigned int len = sizeof(augmented_args->args); - struct syscall *syscall; if (pid_filter__has(&pids_filtered, getpid())) return 0; -- cgit v1.2.3 From 8daf87f5922730468c98ae588573386042bc2992 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:37 +0000 Subject: perf trace: Remove unused bpf map 'syscalls' augmented_raw_syscalls.c defines the bpf map 'syscalls' which is initialized by perf tool in user space to indicate which system calls are enabled for tracing, on the other flip eBPF program relies on the map to filter out the trace events which are not enabled. The map also includes a field 'string_args_len[6]' which presents the string length if the corresponding argument is a string type. Now the map 'syscalls' is not used, bpf program doesn't use it as filter anymore, this is replaced by using the function bpf_tail_call() and PROG_ARRAY syscalls map. And we don't need to explicitly set the string length anymore, bpf_probe_read_str() is smart to copy the string and return string length. Therefore, it's safe to remove the bpf map 'syscalls'. To consolidate the code, this patch removes the definition of map 'syscalls' from augmented_raw_syscalls.c and drops code for using the map in the perf trace. Note, since function trace__set_ev_qualifier_bpf_filter() is removed, calling trace__init_syscall_bpf_progs() from it is also removed. We don't need to worry it because trace__init_syscall_bpf_progs() is still invoked from trace__init_syscalls_bpf_prog_array_maps() for initialization the system call's bpf program callback. After: # perf trace -e examples/bpf/augmented_raw_syscalls.c,open* --max-events 10 perf stat --quiet sleep 0.001 openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libdw.so.1", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind.so.8", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind-aarch64.so.8", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libcrypto.so.3", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libslang.so.2", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libperl.so.5.34", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 # perf trace -e examples/bpf/augmented_raw_syscalls.c --max-events 10 perf stat --quiet sleep 0.001 ... [continued]: execve()) = 0 brk(NULL) = 0xaaaab1d28000 faccessat(-100, "/etc/ld.so.preload", 4) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 close(3) = 0 openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3 read(3, 0xfffff33f70d0, 832) = 832 munmap(0xffffb5519000, 28672) = 0 munmap(0xffffb55b7000, 32880) = 0 mprotect(0xffffb55a6000, 61440, PROT_NONE) = 0 Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 101 ----------------------- tools/perf/examples/bpf/augmented_raw_syscalls.c | 17 ---- 2 files changed, 118 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 071e7598391f..543c379d2a57 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -122,7 +122,6 @@ struct trace { struct syscalltbl *sctbl; struct { struct syscall *table; - struct bpf_map *map; struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY struct bpf_map *sys_enter, *sys_exit; @@ -1224,16 +1223,6 @@ struct syscall { struct syscall_arg_fmt *arg_fmt; }; -/* - * Must match what is in the BPF program: - * - * tools/perf/examples/bpf/augmented_raw_syscalls.c - */ -struct bpf_map_syscall_entry { - bool enabled; - u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; -}; - /* * We need to have this 'calculated' boolean because in some cases we really * don't know what is the duration of a syscall, for instance, when we start @@ -3259,7 +3248,6 @@ static void trace__set_bpf_map_filtered_pids(struct trace *trace) static void trace__set_bpf_map_syscalls(struct trace *trace) { - trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); } @@ -3339,80 +3327,6 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); } -static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) -{ - struct syscall *sc = trace__syscall_info(trace, NULL, id); - int arg = 0; - - if (sc == NULL) - goto out; - - for (; arg < sc->nr_args; ++arg) { - entry->string_args_len[arg] = 0; - if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { - /* Should be set like strace -s strsize */ - entry->string_args_len[arg] = PATH_MAX; - } - } -out: - for (; arg < 6; ++arg) - entry->string_args_len[arg] = 0; -} -static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) -{ - int fd = bpf_map__fd(trace->syscalls.map); - struct bpf_map_syscall_entry value = { - .enabled = !trace->not_ev_qualifier, - }; - int err = 0; - size_t i; - - for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { - int key = trace->ev_qualifier_ids.entries[i]; - - if (value.enabled) { - trace__init_bpf_map_syscall_args(trace, key, &value); - trace__init_syscall_bpf_progs(trace, key); - } - - err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); - if (err) - break; - } - - return err; -} - -static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled) -{ - int fd = bpf_map__fd(trace->syscalls.map); - struct bpf_map_syscall_entry value = { - .enabled = enabled, - }; - int err = 0, key; - - for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { - if (enabled) - trace__init_bpf_map_syscall_args(trace, key, &value); - - err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); - if (err) - break; - } - - return err; -} - -static int trace__init_syscalls_bpf_map(struct trace *trace) -{ - bool enabled = true; - - if (trace->ev_qualifier_ids.nr) - enabled = trace->not_ev_qualifier; - - return __trace__init_syscalls_bpf_map(trace, enabled); -} - static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) { struct tep_format_field *field, *candidate_field; @@ -3627,16 +3541,6 @@ static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused) { } -static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) -{ - return 0; -} - -static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused) -{ - return 0; -} - static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, const char *name __maybe_unused) { @@ -3670,8 +3574,6 @@ static bool trace__only_augmented_syscalls_evsels(struct trace *trace) static int trace__set_ev_qualifier_filter(struct trace *trace) { - if (trace->syscalls.map) - return trace__set_ev_qualifier_bpf_filter(trace); if (trace->syscalls.events.sys_enter) return trace__set_ev_qualifier_tp_filter(trace); return 0; @@ -4045,9 +3947,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_mem; - if (trace->syscalls.map) - trace__init_syscalls_bpf_map(trace); - if (trace->syscalls.prog_array.sys_enter) trace__init_syscalls_bpf_prog_array_maps(trace); diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 4203f92c063b..9a03189d33d3 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -37,23 +37,6 @@ struct __augmented_syscalls__ { __uint(max_entries, __NR_CPUS__); } __augmented_syscalls__ SEC(".maps"); -/* - * string_args_len: one per syscall arg, 0 means not a string or don't copy it, - * PATH_MAX for copying everything, any other value to limit - * it a la 'strace -s strsize'. - */ -struct syscall { - bool enabled; - __u16 string_args_len[6]; -}; - -struct syscalls { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, struct syscall); - __uint(max_entries, 512); -} syscalls SEC(".maps"); - /* * What to augment at entry? * -- cgit v1.2.3 From 7d54a4acd8c1de3ea70d31424757dcdb7f0a231a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 21 Nov 2022 15:57:47 +0530 Subject: perf test: Skip watchpoint tests if no watchpoints available On IBM Power9, perf watchpoint tests fail since no hardware breakpoints are available. Detect this by checking the error returned by perf_event_open() and skip the tests in that case. Reported-by: Disha Goel Signed-off-by: Naveen N. Rao Acked-by: Ian Rogers Reviewed-by: Kajol Jain Tested-by: Kajol Jain Link: https://lore.kernel.org/r/20221121102747.208289-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Ravi Bangoria Cc: Arnaldo Carvalho de Melo Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org --- tools/perf/tests/wp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 56455da30341..cc8719609b19 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -59,8 +59,10 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) get__perf_event_attr(&attr, wp_type, wp_addr, wp_len); fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { + fd = -errno; pr_debug("failed opening event %x\n", attr.bp_type); + } return fd; } @@ -77,7 +79,7 @@ static int test__wp_ro(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1); @@ -101,7 +103,7 @@ static int test__wp_wo(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0); @@ -126,7 +128,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused, fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; tmp = data1; WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1); @@ -150,7 +152,7 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest _ fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); if (fd < 0) - return -1; + return fd == -ENODEV ? TEST_SKIP : -1; data1 = tmp; WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1); -- cgit v1.2.3 From 19030564ab116757e3270a567fd9d5b20b411d74 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 18 Nov 2022 16:27:50 -0800 Subject: perf inject: Set PERF_RECORD_MISC_BUILD_ID_SIZE With perf inject -b, it synthesizes build-id event for DSOs. But it missed to set the size and resulted in having trailing zeros. As perf record sets the size in write_build_id(), let's set the size here as well. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221119002750.1568027-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cccd293b5312..0645795ff080 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2218,8 +2218,9 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); + ev.build_id.size = pos->bid.size; ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; - ev.build_id.header.misc = misc; + ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; ev.build_id.header.size = sizeof(ev.build_id) + len; memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); -- cgit v1.2.3 From 2e9f5bda2f036aa38312fbeb99ff7e19c0221578 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 22 Nov 2022 09:31:21 +0100 Subject: perf test: Fix record test on KVM guests Using precise flag with br_inst_retired.near_call causes the test fail on KVM guests, even when the guests have PMU forwarding enabled and the event itself is supported. Remove the precise flag in order to make the test work on KVM guests. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20221122083121.6012-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 4dff89e3a3fd..4fbc74805d52 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -83,7 +83,7 @@ test_register_capture() { echo "Register capture test [Skipped missing registers]" return fi - if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ + if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \ -c 1000 --per-thread ${testprog} 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | grep -q "DI:" -- cgit v1.2.3 From 7b7c22ccdf275018f715af95b7d052e8d4c6c690 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Nov 2022 11:30:18 +0000 Subject: MAINTAINERS: Update John Garry's email address for arm64 perf tooling Update my address. Signed-off-by: John Garry Acked-by: Will Deacon Cc: Ian Rogers Link: https://lore.kernel.org/r/20221121113018.1899426-1-john.g.garry@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2585e7edc335..170f1763557a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16200,7 +16200,7 @@ F: tools/lib/perf/ F: tools/perf/ PERFORMANCE EVENTS TOOLING ARM64 -R: John Garry +R: John Garry R: Will Deacon R: James Clark R: Mike Leach -- cgit v1.2.3 From c66a36af7ba3a628453da1d91f42ee64fb36ea5a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 18 Nov 2022 11:01:09 -0800 Subject: perf lock contention: Do not use BPF task local storage It caused some troubles when a lock inside kmalloc is contended because task local storage would allocate memory using kmalloc. It'd create a recusion and even crash in my system. There could be a couple of workarounds but I think the simplest one is to use a pre-allocated hash map. We could fix the task local storage to use the safe BPF allocator, but it takes time so let's change this until it happens actually. Signed-off-by: Namhyung Kim Acked-by: Martin KaFai Lau Cc: Adrian Hunter Cc: Blake Jones Cc: Chris Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221118190109.1512674-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 1 + tools/perf/util/bpf_skel/lock_contention.bpf.c | 34 +++++++++++++++++--------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 0deec1178778..4db9ad3d50c4 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -39,6 +39,7 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); if (target__has_cpu(target)) ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 1bb8628e7c9f..9681cb59b0df 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -40,10 +40,10 @@ struct { /* maintain timestamp at the beginning of contention */ struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, struct tstamp_data); + __uint(max_entries, MAX_ENTRIES); } tstamp SEC(".maps"); /* actual lock contention statistics */ @@ -103,18 +103,28 @@ static inline int can_record(void) SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { - struct task_struct *curr; + __u32 pid; struct tstamp_data *pelem; if (!enabled || !can_record()) return 0; - curr = bpf_get_current_task_btf(); - pelem = bpf_task_storage_get(&tstamp, curr, NULL, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!pelem || pelem->lock) + pid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &pid); + if (pelem && pelem->lock) return 0; + if (pelem == NULL) { + struct tstamp_data zero = {}; + + bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY); + pelem = bpf_map_lookup_elem(&tstamp, &pid); + if (pelem == NULL) { + lost++; + return 0; + } + } + pelem->timestamp = bpf_ktime_get_ns(); pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; @@ -128,7 +138,7 @@ int contention_begin(u64 *ctx) SEC("tp_btf/contention_end") int contention_end(u64 *ctx) { - struct task_struct *curr; + __u32 pid; struct tstamp_data *pelem; struct contention_key key; struct contention_data *data; @@ -137,8 +147,8 @@ int contention_end(u64 *ctx) if (!enabled) return 0; - curr = bpf_get_current_task_btf(); - pelem = bpf_task_storage_get(&tstamp, curr, NULL, 0); + pid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &pid); if (!pelem || pelem->lock != ctx[0]) return 0; @@ -156,7 +166,7 @@ int contention_end(u64 *ctx) }; bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); - pelem->lock = 0; + bpf_map_delete_elem(&tstamp, &pid); return 0; } @@ -169,7 +179,7 @@ int contention_end(u64 *ctx) if (data->min_time > duration) data->min_time = duration; - pelem->lock = 0; + bpf_map_delete_elem(&tstamp, &pid); return 0; } -- cgit v1.2.3 From 15792642db6946890416a6d1616b03ab25c26fa7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:54 -0800 Subject: perf stat: Fix cgroup display in JSON output It missed the 'else' keyword after checking json output mode. Fixes: 41cb875242e71bf1 ("perf stat: Split print_cgroup() function") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f5501760ff2e..46e90f0bb423 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -175,7 +175,7 @@ static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) if (config->json_output) print_cgroup_json(config, cgrp_name); - if (config->csv_output) + else if (config->csv_output) print_cgroup_csv(config, cgrp_name); else print_cgroup_std(config, cgrp_name); -- cgit v1.2.3 From 6d74ed369d4342bb7d4fecbc1cde6061b5bb5604 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:55 -0800 Subject: perf stat: Move summary prefix printing logic in CSV output It matches to the prefix (interval timestamp), so better to have them together. No functional change intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 46e90f0bb423..d86f2f8e020d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -713,11 +713,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nl = config->metric_only ? new_line_metric : new_line_std; } - if (!config->no_csv_summary && config->csv_output && - config->summary && !config->interval && !config->metric_only) { - fprintf(config->output, "%16s%s", "summary", config->csv_sep); - } - if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { pm(config, &os, NULL, "", "", 0); @@ -828,8 +823,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - if (prefix && !metric_only) - fprintf(output, "%s", prefix); + if (!metric_only) { + if (prefix) + fprintf(output, "%s", prefix); + else if (config->summary && config->csv_output && + !config->no_csv_summary && !config->interval) + fprintf(output, "%16s%s", "summary", config->csv_sep); + } uval = val * counter->scale; -- cgit v1.2.3 From 8e55ae24c08fd5bf39b632df72122a7c2591c03c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:56 -0800 Subject: perf stat: Do not align time prefix in CSV output We don't care about the alignment in the CSV output as it's intended for machine processing. Let's get rid of it to make the output more compact. Before: # perf stat -a --summary -I 1 -x, true 0.001149309,219.20,msec,cpu-clock,219322251,100.00,219.200,CPUs utilized 0.001149309,144,,context-switches,219241902,100.00,656.935,/sec 0.001149309,38,,cpu-migrations,219173705,100.00,173.358,/sec 0.001149309,61,,page-faults,219093635,100.00,278.285,/sec 0.001149309,10679310,,cycles,218746228,100.00,0.049,GHz 0.001149309,6288296,,instructions,218589869,100.00,0.59,insn per cycle 0.001149309,1386904,,branches,218428851,100.00,6.327,M/sec 0.001149309,56863,,branch-misses,218219951,100.00,4.10,of all branches summary,219.20,msec,cpu-clock,219322251,100.00,20.025,CPUs utilized summary,144,,context-switches,219241902,100.00,656.935,/sec summary,38,,cpu-migrations,219173705,100.00,173.358,/sec summary,61,,page-faults,219093635,100.00,278.285,/sec summary,10679310,,cycles,218746228,100.00,0.049,GHz summary,6288296,,instructions,218589869,100.00,0.59,insn per cycle summary,1386904,,branches,218428851,100.00,6.327,M/sec summary,56863,,branch-misses,218219951,100.00,4.10,of all branches After: 0.001148449,224.75,msec,cpu-clock,224870589,100.00,224.747,CPUs utilized 0.001148449,176,,context-switches,224775564,100.00,783.103,/sec 0.001148449,38,,cpu-migrations,224707428,100.00,169.079,/sec 0.001148449,61,,page-faults,224629326,100.00,271.416,/sec 0.001148449,12172071,,cycles,224266368,100.00,0.054,GHz 0.001148449,6901907,,instructions,224108764,100.00,0.57,insn per cycle 0.001148449,1515655,,branches,223946693,100.00,6.744,M/sec 0.001148449,70027,,branch-misses,223735385,100.00,4.62,of all branches summary,224.75,msec,cpu-clock,224870589,100.00,21.066,CPUs utilized summary,176,,context-switches,224775564,100.00,783.103,/sec summary,38,,cpu-migrations,224707428,100.00,169.079,/sec summary,61,,page-faults,224629326,100.00,271.416,/sec summary,12172071,,cycles,224266368,100.00,0.054,GHz summary,6901907,,instructions,224108764,100.00,0.57,insn per cycle summary,1515655,,branches,223946693,100.00,6.744,M/sec summary,70027,,branch-misses,223735385,100.00,4.62,of all branches Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d86f2f8e020d..15c88b9b5aa3 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -828,7 +828,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); else if (config->summary && config->csv_output && !config->no_csv_summary && !config->interval) - fprintf(output, "%16s%s", "summary", config->csv_sep); + fprintf(output, "%s%s", "summary", config->csv_sep); } uval = val * counter->scale; @@ -1078,9 +1078,12 @@ static void prepare_interval(struct perf_stat_config *config, if (config->iostat_run) return; - if (!config->json_output) - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, + if (config->csv_output) + sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); + else if (!config->json_output) + sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec, + ts->tv_nsec); else if (!config->metric_only) sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) ts->tv_sec, ts->tv_nsec); -- cgit v1.2.3 From a7ec1dd2d744208ca814ebde8c97f94e041625ef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:57 -0800 Subject: perf stat: Use scnprintf() in prepare_interval() It should not use sprintf() anymore. Let's pass the buffer size and use the safer scnprintf() instead. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 15c88b9b5aa3..744b7a40f59a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1073,23 +1073,23 @@ static void print_metric_headers(struct perf_stat_config *config, } static void prepare_interval(struct perf_stat_config *config, - char *prefix, struct timespec *ts) + char *prefix, size_t len, struct timespec *ts) { if (config->iostat_run) return; if (config->csv_output) - sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec, - ts->tv_nsec, config->csv_sep); + scnprintf(prefix, len, "%lu.%09lu%s", + (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); else if (!config->json_output) - sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec, - ts->tv_nsec); + scnprintf(prefix, len, "%6lu.%09lu ", + (unsigned long) ts->tv_sec, ts->tv_nsec); else if (!config->metric_only) - sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) - ts->tv_sec, ts->tv_nsec); + scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ", + (unsigned long) ts->tv_sec, ts->tv_nsec); else - sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) - ts->tv_sec, ts->tv_nsec); + scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}", + (unsigned long) ts->tv_sec, ts->tv_nsec); } static void print_header_interval_std(struct perf_stat_config *config, @@ -1390,7 +1390,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (interval) { prefix = buf; - prepare_interval(config, prefix, ts); + prepare_interval(config, buf, sizeof(buf), ts); } print_header(config, _target, evlist, argc, argv); -- cgit v1.2.3 From f123b2d84ecec9a3c551e745f43d36bba1d69e1c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:58 -0800 Subject: perf stat: Remove prefix argument in print_metric_headers() It always passes a whitespace to the function, thus we can just add it to the function body. Furthermore, it's only used in the normal output mode. Well, actually CSV used it but it doesn't need to since we don't care about the indentation or alignment in the CSV output. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 744b7a40f59a..deed6ccf072f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -996,10 +996,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } static void print_metric_headers_std(struct perf_stat_config *config, - const char *prefix, bool no_indent) + bool no_indent) { - if (prefix) - fprintf(config->output, "%s", prefix); + fputc(' ', config->output); if (!no_indent) { int len = aggr_header_lens[config->aggr_mode]; @@ -1012,11 +1011,8 @@ static void print_metric_headers_std(struct perf_stat_config *config, } static void print_metric_headers_csv(struct perf_stat_config *config, - const char *prefix, bool no_indent __maybe_unused) { - if (prefix) - fprintf(config->output, "%s", prefix); if (config->interval) fputs("time,", config->output); if (!config->iostat_run) @@ -1024,7 +1020,6 @@ static void print_metric_headers_csv(struct perf_stat_config *config, } static void print_metric_headers_json(struct perf_stat_config *config, - const char *prefix __maybe_unused, bool no_indent __maybe_unused) { if (config->interval) @@ -1032,8 +1027,7 @@ static void print_metric_headers_json(struct perf_stat_config *config, } static void print_metric_headers(struct perf_stat_config *config, - struct evlist *evlist, - const char *prefix, bool no_indent) + struct evlist *evlist, bool no_indent) { struct evsel *counter; struct outstate os = { @@ -1047,11 +1041,11 @@ static void print_metric_headers(struct perf_stat_config *config, }; if (config->json_output) - print_metric_headers_json(config, prefix, no_indent); + print_metric_headers_json(config, no_indent); else if (config->csv_output) - print_metric_headers_csv(config, prefix, no_indent); + print_metric_headers_csv(config, no_indent); else - print_metric_headers_std(config, prefix, no_indent); + print_metric_headers_std(config, no_indent); if (config->iostat_run) iostat_print_header_prefix(config); @@ -1132,7 +1126,7 @@ static void print_header_interval_std(struct perf_stat_config *config, } if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); else fprintf(output, " %*s %*s events\n", COUNTS_LEN, "counts", config->unit_width, "unit"); @@ -1168,7 +1162,7 @@ static void print_header_std(struct perf_stat_config *config, fprintf(output, ":\n\n"); if (config->metric_only) - print_metric_headers(config, evlist, " ", false); + print_metric_headers(config, evlist, false); } static void print_header_csv(struct perf_stat_config *config, @@ -1178,7 +1172,7 @@ static void print_header_csv(struct perf_stat_config *config, const char **argv __maybe_unused) { if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); } static void print_header_json(struct perf_stat_config *config, struct target *_target __maybe_unused, @@ -1187,7 +1181,7 @@ static void print_header_json(struct perf_stat_config *config, const char **argv __maybe_unused) { if (config->metric_only) - print_metric_headers(config, evlist, " ", true); + print_metric_headers(config, evlist, true); } static void print_header(struct perf_stat_config *config, -- cgit v1.2.3 From ce551ec923445b821893bddfb13c116d7e8fe454 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:01:59 -0800 Subject: perf stat: Remove metric_only argument in print_counter_aggrdata() It already passes the stat_config argument, then it can find the value in the config. No need to pass it separately. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index deed6ccf072f..b8432c0a0ec3 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix, bool metric_only) + char *prefix) { FILE *output = config->output; u64 ena, run, val; @@ -813,6 +813,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct perf_stat_aggr *aggr = &ps->aggr[s]; struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; + bool metric_only = config->metric_only; if (counter->supported && aggr->nr == 0) return; @@ -875,7 +876,6 @@ static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - bool metric_only = config->metric_only; struct evsel *counter; int s; @@ -893,8 +893,7 @@ static void print_aggr(struct perf_stat_config *config, if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } print_metric_end(config); } @@ -904,7 +903,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - bool metric_only = config->metric_only; struct evsel *counter, *evsel; struct cgroup *cgrp = NULL; int s; @@ -928,8 +926,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, if (counter->cgrp != cgrp) continue; - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } print_metric_end(config); } @@ -939,7 +936,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - bool metric_only = config->metric_only; int s; /* AGGR_THREAD doesn't have config->aggr_get_id */ @@ -950,8 +946,7 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, prefix, - metric_only); + print_counter_aggrdata(config, counter, s, prefix); } } @@ -1339,7 +1334,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - print_counter_aggrdata(config, counter, s, prefix, metric_only); + print_counter_aggrdata(config, counter, s, prefix); core_map->map[c++] = core_id; } -- cgit v1.2.3 From 991991ab99635d9e368f9671fa8c30ec1113042c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:00 -0800 Subject: perf stat: Pass const char *prefix to display routines This is a minor cleanup and preparation for the later change. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/iostat.h | 2 +- tools/perf/util/stat-display.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h index 23c1c46a331a..c22688f87cb2 100644 --- a/tools/perf/util/iostat.h +++ b/tools/perf/util/iostat.h @@ -28,7 +28,7 @@ enum iostat_mode_t { extern enum iostat_mode_t iostat_mode; -typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *); +typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *); int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); int iostat_parse(const struct option *opt, const char *str, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index b8432c0a0ec3..d2894a519d61 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -675,7 +675,7 @@ static bool is_mixed_hw_group(struct evsel *counter) static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise, + const char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; @@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix) + const char *prefix) { FILE *output = config->output; u64 ena, run, val; @@ -843,7 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - char *prefix, int aggr_idx, + const char *prefix, int aggr_idx, struct cgroup *cgrp) { struct perf_stat_aggr *aggr; @@ -874,7 +874,7 @@ static void print_metric_end(struct perf_stat_config *config) static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct evsel *counter; int s; @@ -901,7 +901,7 @@ static void print_aggr(struct perf_stat_config *config, static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct evsel *counter, *evsel; struct cgroup *cgrp = NULL; @@ -934,7 +934,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, } static void print_counter(struct perf_stat_config *config, - struct evsel *counter, char *prefix) + struct evsel *counter, const char *prefix) { int s; @@ -952,7 +952,7 @@ static void print_counter(struct perf_stat_config *config, static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { int all_idx; struct perf_cpu cpu; @@ -1301,7 +1301,7 @@ static void print_footer(struct perf_stat_config *config) } static void print_percore(struct perf_stat_config *config, - struct evsel *counter, char *prefix) + struct evsel *counter, const char *prefix) { bool metric_only = config->metric_only; FILE *output = config->output; @@ -1345,7 +1345,7 @@ static void print_percore(struct perf_stat_config *config, } static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, - char *prefix) + const char *prefix) { struct cgroup *cgrp = NULL; struct evsel *counter; -- cgit v1.2.3 From 92ccf7f11d68fa55dc82cb7ae01043b3691918cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:01 -0800 Subject: perf stat: Use 'struct outstate' in evlist__print_counters() This is a preparation for the later cleanup. No functional changes intended. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d2894a519d61..70aebf359e16 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1372,13 +1372,16 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf bool metric_only = config->metric_only; int interval = config->interval; struct evsel *counter; - char buf[64], *prefix = NULL; + char buf[64]; + struct outstate os = { + .fh = config->output, + }; if (config->iostat_run) evlist->selected = evlist__first(evlist); if (interval) { - prefix = buf; + os.prefix = buf; prepare_interval(config, buf, sizeof(buf), ts); } @@ -1390,35 +1393,35 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_SOCKET: case AGGR_NODE: if (config->cgroup_list) - print_aggr_cgroup(config, evlist, prefix); + print_aggr_cgroup(config, evlist, os.prefix); else - print_aggr(config, evlist, prefix); + print_aggr(config, evlist, os.prefix); break; case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { - iostat_print_counters(evlist, config, ts, prefix = buf, + iostat_print_counters(evlist, config, ts, buf, print_counter); } else if (config->cgroup_list) { - print_cgroup_counter(config, evlist, prefix); + print_cgroup_counter(config, evlist, os.prefix); } else { - print_metric_begin(config, evlist, prefix, + print_metric_begin(config, evlist, os.prefix, /*aggr_idx=*/0, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, prefix); + print_counter(config, counter, os.prefix); } print_metric_end(config); } break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(config, evlist, prefix); + print_no_aggr_metric(config, evlist, os.prefix); else { evlist__for_each_entry(evlist, counter) { if (counter->percore) - print_percore(config, counter, prefix); + print_percore(config, counter, os.prefix); else - print_counter(config, counter, prefix); + print_counter(config, counter, os.prefix); } } break; -- cgit v1.2.3 From 922ae948c429a9b396761f2071eebf6a19688dd2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:02 -0800 Subject: perf stat: Pass 'struct outstate' to print_metric_begin() It passes prefix and cgroup pointers but the outstate already has them. Let's pass the outstate pointer instead. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 50 +++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 70aebf359e16..3ed63061d6f8 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -843,8 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, static void print_metric_begin(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix, int aggr_idx, - struct cgroup *cgrp) + struct outstate *os, int aggr_idx) { struct perf_stat_aggr *aggr; struct aggr_cpu_id id; @@ -853,15 +852,15 @@ static void print_metric_begin(struct perf_stat_config *config, if (!config->metric_only) return; - if (prefix) - fprintf(config->output, "%s", prefix); + if (os->prefix) + fprintf(config->output, "%s", os->prefix); evsel = evlist__first(evlist); id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); - print_cgroup(config, cgrp); + print_cgroup(config, os->cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -877,6 +876,9 @@ static void print_aggr(struct perf_stat_config *config, const char *prefix) { struct evsel *counter; + struct outstate os = { + .prefix = prefix, + }; int s; if (!config->aggr_map || !config->aggr_get_id) @@ -887,7 +889,7 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) @@ -904,26 +906,28 @@ static void print_aggr_cgroup(struct perf_stat_config *config, const char *prefix) { struct evsel *counter, *evsel; - struct cgroup *cgrp = NULL; + struct outstate os = { + .prefix = prefix, + }; int s; if (!config->aggr_map || !config->aggr_get_id) return; evlist__for_each_entry(evlist, evsel) { - if (cgrp == evsel->cgrp) + if (os.cgrp == evsel->cgrp) continue; - cgrp = evsel->cgrp; + os.cgrp = evsel->cgrp; for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, prefix, s, cgrp); + print_metric_begin(config, evlist, &os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - if (counter->cgrp != cgrp) + if (counter->cgrp != os.cgrp) continue; print_counter_aggrdata(config, counter, s, prefix); @@ -956,6 +960,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config, { int all_idx; struct perf_cpu cpu; + struct outstate os = { + .prefix = prefix, + }; perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) { struct evsel *counter; @@ -973,8 +980,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, prefix, - counter_idx, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -1347,22 +1353,23 @@ static void print_percore(struct perf_stat_config *config, static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, const char *prefix) { - struct cgroup *cgrp = NULL; struct evsel *counter; + struct outstate os = { + .prefix = prefix, + }; evlist__for_each_entry(evlist, counter) { - if (cgrp != counter->cgrp) { - if (cgrp != NULL) + if (os.cgrp != counter->cgrp) { + if (os.cgrp != NULL) print_metric_end(config); - cgrp = counter->cgrp; - print_metric_begin(config, evlist, prefix, - /*aggr_idx=*/0, cgrp); + os.cgrp = counter->cgrp; + print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); } print_counter(config, counter, prefix); } - if (cgrp) + if (os.cgrp) print_metric_end(config); } @@ -1405,8 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf } else if (config->cgroup_list) { print_cgroup_counter(config, evlist, os.prefix); } else { - print_metric_begin(config, evlist, os.prefix, - /*aggr_idx=*/0, /*cgrp=*/NULL); + print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { print_counter(config, counter, os.prefix); } -- cgit v1.2.3 From e7f4da312259e61877ae8e26d216993c4128bddc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:03 -0800 Subject: perf stat: Pass struct outstate to printout() The printout() takes a lot of arguments and sets an outstate with the value. Instead, we can fill the outstate first and then pass it to reduce the number of arguments. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 3ed63061d6f8..dd190f71e933 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -673,22 +673,15 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } -static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, - struct evsel *counter, double uval, - const char *prefix, u64 run, u64 ena, double noise, +static void printout(struct perf_stat_config *config, struct outstate *os, + double uval, u64 run, u64 ena, double noise, struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; - struct outstate os = { - .fh = config->output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = nr, - .evsel = counter, - }; print_metric_t pm; new_line_t nl; bool ok = true; + struct evsel *counter = os->evsel; if (config->csv_output) { static const int aggr_fields[AGGR_MAX] = { @@ -704,7 +697,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm = config->metric_only ? print_metric_only_csv : print_metric_csv; nl = config->metric_only ? new_line_metric : new_line_csv; - os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); + os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; nl = config->metric_only ? new_line_metric : new_line_json; @@ -715,7 +708,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, &os, NULL, "", "", 0); + pm(config, os, NULL, "", "", 0); return; } @@ -732,11 +725,11 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int out.print_metric = pm; out.new_line = nl; - out.ctx = &os; + out.ctx = os; out.force_header = false; if (!config->metric_only) { - abs_printout(config, id, nr, counter, uval, ok); + abs_printout(config, os->id, os->nr, counter, uval, ok); print_noise(config, counter, noise, /*before_metric=*/true); print_running(config, run, ena, /*before_metric=*/true); @@ -814,6 +807,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; bool metric_only = config->metric_only; + struct outstate os = { + .fh = config->output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = aggr->nr, + .evsel = counter, + }; if (counter->supported && aggr->nr == 0) return; @@ -834,8 +834,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, id, aggr->nr, counter, uval, - prefix, run, ena, avg, &rt_stat, s); + printout(config, &os, uval, run, ena, avg, &rt_stat, s); if (!metric_only) fputc('\n', output); @@ -971,14 +970,14 @@ static void print_no_aggr_metric(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { u64 ena, run, val; double uval; - struct aggr_cpu_id id; struct perf_stat_evsel *ps = counter->stats; int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); if (counter_idx < 0) continue; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + os.evsel = counter; + os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { print_metric_begin(config, evlist, &os, counter_idx); first = false; @@ -988,8 +987,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat, counter_idx); + printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx); } if (!first) print_metric_end(config); -- cgit v1.2.3 From 01577597493dc8bded8a5880fbf84a6d5bf13f1b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:04 -0800 Subject: perf stat: Do not pass runtime_stat to printout() It always passes a pointer to rt_stat as it's the only one. Let's not pass it and directly refer it in the printout(). Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index dd190f71e933..cdf4ca7f6e3a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -674,8 +674,7 @@ static bool is_mixed_hw_group(struct evsel *counter) } static void printout(struct perf_stat_config *config, struct outstate *os, - double uval, u64 run, u64 ena, double noise, - struct runtime_stat *st, int map_idx) + double uval, u64 run, u64 ena, double noise, int map_idx) { struct perf_stat_output_ctx out; print_metric_t pm; @@ -737,7 +736,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (ok) { perf_stat__print_shadow_stats(config, counter, uval, map_idx, - &out, &config->metric_events, st); + &out, &config->metric_events, &rt_stat); } else { pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } @@ -834,7 +833,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, &os, uval, run, ena, avg, &rt_stat, s); + printout(config, &os, uval, run, ena, avg, s); if (!metric_only) fputc('\n', output); @@ -987,7 +986,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx); + printout(config, &os, uval, run, ena, 1.0, counter_idx); } if (!first) print_metric_end(config); -- cgit v1.2.3 From 5f334d88c25e0dbdbc199ad38becc5cc5aa33081 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:05 -0800 Subject: perf stat: Pass through 'struct outstate' Now most of the print functions take a pointer to the struct outstate. We have one in the evlist__print_counters() and pass it through the child functions. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-13-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/iostat.c | 4 +- tools/perf/util/iostat.c | 3 +- tools/perf/util/iostat.h | 4 +- tools/perf/util/stat-display.c | 102 ++++++++++++++++---------------------- 4 files changed, 50 insertions(+), 63 deletions(-) diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index 404de795ec0b..7eb0a7b00b95 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -449,7 +449,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, void iostat_print_counters(struct evlist *evlist, struct perf_stat_config *config, struct timespec *ts, - char *prefix, iostat_print_counter_t print_cnt_cb) + char *prefix, iostat_print_counter_t print_cnt_cb, void *arg) { void *perf_device = NULL; struct evsel *counter = evlist__first(evlist); @@ -464,7 +464,7 @@ void iostat_print_counters(struct evlist *evlist, iostat_prefix(evlist, config, prefix, ts); fprintf(config->output, "\n%s", prefix); } - print_cnt_cb(config, counter, prefix); + print_cnt_cb(config, counter, arg); } fputc('\n', config->output); } diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c index 57dd49da28fe..b770bd473af7 100644 --- a/tools/perf/util/iostat.c +++ b/tools/perf/util/iostat.c @@ -48,6 +48,7 @@ __weak void iostat_print_counters(struct evlist *evlist __maybe_unused, struct perf_stat_config *config __maybe_unused, struct timespec *ts __maybe_unused, char *prefix __maybe_unused, - iostat_print_counter_t print_cnt_cb __maybe_unused) + iostat_print_counter_t print_cnt_cb __maybe_unused, + void *arg __maybe_unused) { } diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h index c22688f87cb2..a4e7299c5c2f 100644 --- a/tools/perf/util/iostat.h +++ b/tools/perf/util/iostat.h @@ -28,7 +28,7 @@ enum iostat_mode_t { extern enum iostat_mode_t iostat_mode; -typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *); +typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, void *); int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); int iostat_parse(const struct option *opt, const char *str, @@ -42,6 +42,6 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, struct perf_stat_output_ctx *out); void iostat_print_counters(struct evlist *evlist, struct perf_stat_config *config, struct timespec *ts, - char *prefix, iostat_print_counter_t print_cnt_cb); + char *prefix, iostat_print_counter_t print_cnt_cb, void *arg); #endif /* _IOSTAT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cdf4ca7f6e3a..335627e8542d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -796,7 +796,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - const char *prefix) + struct outstate *os) { FILE *output = config->output; u64 ena, run, val; @@ -806,13 +806,10 @@ static void print_counter_aggrdata(struct perf_stat_config *config, struct aggr_cpu_id id = config->aggr_map->map[s]; double avg = aggr->counts.val; bool metric_only = config->metric_only; - struct outstate os = { - .fh = config->output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = aggr->nr, - .evsel = counter, - }; + + os->id = id; + os->nr = aggr->nr; + os->evsel = counter; if (counter->supported && aggr->nr == 0) return; @@ -824,8 +821,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, run = aggr->counts.run; if (!metric_only) { - if (prefix) - fprintf(output, "%s", prefix); + if (os->prefix) + fprintf(output, "%s", os->prefix); else if (config->summary && config->csv_output && !config->no_csv_summary && !config->interval) fprintf(output, "%s%s", "summary", config->csv_sep); @@ -833,7 +830,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, uval = val * counter->scale; - printout(config, &os, uval, run, ena, avg, s); + printout(config, os, uval, run, ena, avg, s); if (!metric_only) fputc('\n', output); @@ -871,12 +868,9 @@ static void print_metric_end(struct perf_stat_config *config) static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter; - struct outstate os = { - .prefix = prefix, - }; int s; if (!config->aggr_map || !config->aggr_get_id) @@ -887,13 +881,13 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, &os, s); + print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } print_metric_end(config); } @@ -901,34 +895,31 @@ static void print_aggr(struct perf_stat_config *config, static void print_aggr_cgroup(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter, *evsel; - struct outstate os = { - .prefix = prefix, - }; int s; if (!config->aggr_map || !config->aggr_get_id) return; evlist__for_each_entry(evlist, evsel) { - if (os.cgrp == evsel->cgrp) + if (os->cgrp == evsel->cgrp) continue; - os.cgrp = evsel->cgrp; + os->cgrp = evsel->cgrp; for (s = 0; s < config->aggr_map->nr; s++) { - print_metric_begin(config, evlist, &os, s); + print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { if (counter->merged_stat) continue; - if (counter->cgrp != os.cgrp) + if (counter->cgrp != os->cgrp) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } print_metric_end(config); } @@ -936,7 +927,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, } static void print_counter(struct perf_stat_config *config, - struct evsel *counter, const char *prefix) + struct evsel *counter, struct outstate *os) { int s; @@ -948,19 +939,16 @@ static void print_counter(struct perf_stat_config *config, return; for (s = 0; s < config->aggr_map->nr; s++) { - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); } } static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { int all_idx; struct perf_cpu cpu; - struct outstate os = { - .prefix = prefix, - }; perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) { struct evsel *counter; @@ -975,10 +963,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (counter_idx < 0) continue; - os.evsel = counter; - os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + os->evsel = counter; + os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - print_metric_begin(config, evlist, &os, counter_idx); + print_metric_begin(config, evlist, os, counter_idx); first = false; } val = ps->aggr[counter_idx].counts.val; @@ -986,7 +974,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; - printout(config, &os, uval, run, ena, 1.0, counter_idx); + printout(config, os, uval, run, ena, 1.0, counter_idx); } if (!first) print_metric_end(config); @@ -1304,7 +1292,7 @@ static void print_footer(struct perf_stat_config *config) } static void print_percore(struct perf_stat_config *config, - struct evsel *counter, const char *prefix) + struct evsel *counter, struct outstate *os) { bool metric_only = config->metric_only; FILE *output = config->output; @@ -1315,7 +1303,7 @@ static void print_percore(struct perf_stat_config *config, return; if (config->percore_show_thread) - return print_counter(config, counter, prefix); + return print_counter(config, counter, os); core_map = cpu_aggr_map__empty_new(config->aggr_map->nr); if (core_map == NULL) { @@ -1337,7 +1325,7 @@ static void print_percore(struct perf_stat_config *config, if (found) continue; - print_counter_aggrdata(config, counter, s, prefix); + print_counter_aggrdata(config, counter, s, os); core_map->map[c++] = core_id; } @@ -1348,30 +1336,28 @@ static void print_percore(struct perf_stat_config *config, } static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, - const char *prefix) + struct outstate *os) { struct evsel *counter; - struct outstate os = { - .prefix = prefix, - }; evlist__for_each_entry(evlist, counter) { - if (os.cgrp != counter->cgrp) { - if (os.cgrp != NULL) + if (os->cgrp != counter->cgrp) { + if (os->cgrp != NULL) print_metric_end(config); - os.cgrp = counter->cgrp; - print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); + os->cgrp = counter->cgrp; + print_metric_begin(config, evlist, os, /*aggr_idx=*/0); } - print_counter(config, counter, prefix); + print_counter(config, counter, os); } - if (os.cgrp) + if (os->cgrp) print_metric_end(config); } void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, - struct target *_target, struct timespec *ts, int argc, const char **argv) + struct target *_target, struct timespec *ts, + int argc, const char **argv) { bool metric_only = config->metric_only; int interval = config->interval; @@ -1397,34 +1383,34 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_SOCKET: case AGGR_NODE: if (config->cgroup_list) - print_aggr_cgroup(config, evlist, os.prefix); + print_aggr_cgroup(config, evlist, &os); else - print_aggr(config, evlist, os.prefix); + print_aggr(config, evlist, &os); break; case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { iostat_print_counters(evlist, config, ts, buf, - print_counter); + (iostat_print_counter_t)print_counter, &os); } else if (config->cgroup_list) { - print_cgroup_counter(config, evlist, os.prefix); + print_cgroup_counter(config, evlist, &os); } else { print_metric_begin(config, evlist, &os, /*aggr_idx=*/0); evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, os.prefix); + print_counter(config, counter, &os); } print_metric_end(config); } break; case AGGR_NONE: if (metric_only) - print_no_aggr_metric(config, evlist, os.prefix); + print_no_aggr_metric(config, evlist, &os); else { evlist__for_each_entry(evlist, counter) { if (counter->percore) - print_percore(config, counter, os.prefix); + print_percore(config, counter, &os); else - print_counter(config, counter, os.prefix); + print_counter(config, counter, &os); } } break; -- cgit v1.2.3 From ab6baaae273572909f1e957fc27a9459fc95dd8c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:06 -0800 Subject: perf stat: Fix JSON output in metric-only mode It generated a broken JSON output when aggregation mode or cgroup is used with --metric-only option. Also get rid of the header line and make the output single line for each entry. It needs to know whether the current metric is the first one or not. So add 'first' field in the outstate and mark it false after printing. Before: # perf stat -a -j --metric-only true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {{"metric-value" : "0.797"}{"metric-value" : "1.65"}{"metric-value" : "0.89"} ^ # perf stat -a -j --metric-only --per-socket true {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"} {"socket" : "S0", "aggregate-number" : 8, {"metric-value" : "0.295"}{"metric-value" : "1.88"}{"metric-value" : "0.64"} ^ After: # perf stat -a -j --metric-only true {"GHz" : "0.990", "insn per cycle" : "2.06", "branch-misses of all branches" : "0.59"} # perf stat -a -j --metric-only --per-socket true {"socket" : "S0", "aggregate-number" : 8, "GHz" : "0.439", "insn per cycle" : "2.14", "branch-misses of all branches" : "0.51"} Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-14-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 335627e8542d..43640115454c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -279,9 +279,6 @@ static void print_aggr_id_json(struct perf_stat_config *config, { FILE *output = config->output; - if (!config->interval) - fputc('{', output); - switch (config->aggr_mode) { case AGGR_CORE: fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", @@ -335,6 +332,7 @@ static void aggr_printout(struct perf_stat_config *config, struct outstate { FILE *fh; bool newline; + bool first; const char *prefix; int nfields; int nr; @@ -491,6 +489,7 @@ static void print_metric_only(struct perf_stat_config *config, color_snprintf(str, sizeof(str), color ?: "", fmt, val); fprintf(out, "%*s ", mlen, str); + os->first = false; } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, @@ -512,6 +511,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused ends++; *ends = 0; fprintf(out, "%s%s", vals, config->csv_sep); + os->first = false; } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, @@ -532,7 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - fprintf(out, "{\"metric-value\" : \"%s\"}", vals); + fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); + os->first = false; } static void new_line_metric(struct perf_stat_config *config __maybe_unused, @@ -561,7 +562,7 @@ static void print_metric_header(struct perf_stat_config *config, unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) - fprintf(os->fh, "{\"unit\" : \"%s\"}", unit); + return; else if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else @@ -821,6 +822,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, run = aggr->counts.run; if (!metric_only) { + if (config->json_output) + fputc('{', output); if (os->prefix) fprintf(output, "%s", os->prefix); else if (config->summary && config->csv_output && @@ -844,9 +847,12 @@ static void print_metric_begin(struct perf_stat_config *config, struct aggr_cpu_id id; struct evsel *evsel; + os->first = true; if (!config->metric_only) return; + if (config->json_output) + fputc('{', config->output); if (os->prefix) fprintf(config->output, "%s", os->prefix); @@ -855,7 +861,7 @@ static void print_metric_begin(struct perf_stat_config *config, aggr = &evsel->stats->aggr[aggr_idx]; aggr_printout(config, evsel, id, aggr->nr); - print_cgroup(config, os->cgrp); + print_cgroup(config, os->cgrp ? : evsel->cgrp); } static void print_metric_end(struct perf_stat_config *config) @@ -863,6 +869,8 @@ static void print_metric_end(struct perf_stat_config *config) if (!config->metric_only) return; + if (config->json_output) + fputc('}', config->output); fputc('\n', config->output); } @@ -1005,11 +1013,9 @@ static void print_metric_headers_csv(struct perf_stat_config *config, fputs(aggr_header_csv[config->aggr_mode], config->output); } -static void print_metric_headers_json(struct perf_stat_config *config, +static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused, bool no_indent __maybe_unused) { - if (config->interval) - fputs("{\"unit\" : \"sec\"}", config->output); } static void print_metric_headers(struct perf_stat_config *config, @@ -1049,7 +1055,9 @@ static void print_metric_headers(struct perf_stat_config *config, &config->metric_events, &rt_stat); } - fputc('\n', config->output); + + if (!config->json_output) + fputc('\n', config->output); } static void prepare_interval(struct perf_stat_config *config, @@ -1058,17 +1066,14 @@ static void prepare_interval(struct perf_stat_config *config, if (config->iostat_run) return; - if (config->csv_output) + if (config->json_output) + scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ", + (unsigned long) ts->tv_sec, ts->tv_nsec); + else if (config->csv_output) scnprintf(prefix, len, "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); - else if (!config->json_output) - scnprintf(prefix, len, "%6lu.%09lu ", - (unsigned long) ts->tv_sec, ts->tv_nsec); - else if (!config->metric_only) - scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ", - (unsigned long) ts->tv_sec, ts->tv_nsec); else - scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}", + scnprintf(prefix, len, "%6lu.%09lu ", (unsigned long) ts->tv_sec, ts->tv_nsec); } @@ -1365,6 +1370,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf char buf[64]; struct outstate os = { .fh = config->output, + .first = true, }; if (config->iostat_run) -- cgit v1.2.3 From c4b41b83c25073c09bfcc4e5ec496c9dd316656b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:07 -0800 Subject: perf stat: Rename "aggregate-number" to "cpu-count" in JSON As the JSON output has been broken for a little while, I guess there are not many users. Let's rename the field to more intuitive one. :) Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-15-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 43640115454c..7a39a1a7261d 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ", id.socket, id.die, id.core, nr); break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ", id.socket, id.die, nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ", id.socket, nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ", id.node, nr); break; case AGGR_NONE: -- cgit v1.2.3 From 765d4e497fc51c64b50e5947d0b63bc3ccbd83d3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 23 Nov 2022 10:02:08 -0800 Subject: perf stat: Tidy up JSON metric-only output when no metrics It printed empty strings for each metric. I guess it's needed for CSV output to match the column number. We could just ignore the empty metrics in JSON but it ended up with a broken JSON object with a trailing comma. So I added a dummy '"metric-value" : "none"' part. To do that, it needs to pass struct outstate to print_metric_end() to check if any metric value is printed or not. Before: # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : ""} After: # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "metric-value" : "none"} Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221123180208.2068936-16-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7a39a1a7261d..847acdb5dc40 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -532,6 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; + if (!unit[0] || !vals[0]) + return; fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); os->first = false; } @@ -864,14 +866,19 @@ static void print_metric_begin(struct perf_stat_config *config, print_cgroup(config, os->cgrp ? : evsel->cgrp); } -static void print_metric_end(struct perf_stat_config *config) +static void print_metric_end(struct perf_stat_config *config, struct outstate *os) { + FILE *output = config->output; + if (!config->metric_only) return; - if (config->json_output) - fputc('}', config->output); - fputc('\n', config->output); + if (config->json_output) { + if (os->first) + fputs("\"metric-value\" : \"none\"", output); + fputc('}', output); + } + fputc('\n', output); } static void print_aggr(struct perf_stat_config *config, @@ -897,7 +904,7 @@ static void print_aggr(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, os); } - print_metric_end(config); + print_metric_end(config, os); } } @@ -929,7 +936,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config, print_counter_aggrdata(config, counter, s, os); } - print_metric_end(config); + print_metric_end(config, os); } } } @@ -985,7 +992,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, printout(config, os, uval, run, ena, 1.0, counter_idx); } if (!first) - print_metric_end(config); + print_metric_end(config, os); } } @@ -1348,7 +1355,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist evlist__for_each_entry(evlist, counter) { if (os->cgrp != counter->cgrp) { if (os->cgrp != NULL) - print_metric_end(config); + print_metric_end(config, os); os->cgrp = counter->cgrp; print_metric_begin(config, evlist, os, /*aggr_idx=*/0); @@ -1357,7 +1364,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_counter(config, counter, os); } if (os->cgrp) - print_metric_end(config); + print_metric_end(config, os); } void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, @@ -1405,7 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf evlist__for_each_entry(evlist, counter) { print_counter(config, counter, &os); } - print_metric_end(config); + print_metric_end(config, &os); } break; case AGGR_NONE: -- cgit v1.2.3 From eafcbb6838b67ae67bcab3f794dff593250a71bb Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:38 +0800 Subject: perf vendor events intel: Add core event list for Alderlake-N Alderlake-N only has E-core, it has been moved to non-hybrid code path on the kernel side, so add the cpuid for Alderlake-N separately. Add core event list for Alderlake-N, it is based on the ADL gracemont v1.16 JSON file. https://github.com/intel/perfmon/tree/main/ADL/events/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-1-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/alderlaken/cache.json | 330 +++++++++++++ .../arch/x86/alderlaken/floating-point.json | 18 + .../pmu-events/arch/x86/alderlaken/frontend.json | 26 + .../pmu-events/arch/x86/alderlaken/memory.json | 81 ++++ .../perf/pmu-events/arch/x86/alderlaken/other.json | 38 ++ .../pmu-events/arch/x86/alderlaken/pipeline.json | 533 +++++++++++++++++++++ .../arch/x86/alderlaken/virtual-memory.json | 47 ++ tools/perf/pmu-events/arch/x86/mapfile.csv | 3 +- 8 files changed, 1075 insertions(+), 1 deletion(-) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/other.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json new file mode 100644 index 000000000000..043445ae14a8 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json @@ -0,0 +1,330 @@ +[ + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x38" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD", + "SampleAfterValue": "200003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ALL", + "SampleAfterValue": "20003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.RSV", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of load uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "PEBS": "1", + "PublicDescription": "Counts the total number of load uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x81" + }, + { + "BriefDescription": "Counts the number of store uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "PEBS": "1", + "PublicDescription": "Counts the total number of store uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x82" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of retired split load uops.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "PEBS": "2", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json new file mode 100644 index 000000000000..30e8ca3c1485 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json new file mode 100644 index 000000000000..36898bab2bba --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "EventCode": "0xe6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of instruction cache misses.", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json new file mode 100644 index 000000000000..f84bf8c43495 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json @@ -0,0 +1,81 @@ +[ + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ANY_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xff" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", + "EventCode": "0x05", + "EventName": "LD_HEAD.L1_BOUND_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xf4" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", + "EventCode": "0x05", + "EventName": "LD_HEAD.OTHER_AT_RET", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", + "SampleAfterValue": "1000003", + "UMask": "0xc0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", + "EventCode": "0x05", + "EventName": "LD_HEAD.PGWALK_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xa0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ST_ADDR_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x84" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json new file mode 100644 index 000000000000..6336de61f628 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json @@ -0,0 +1,38 @@ +[ + { + "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.COREWB_M.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10008", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json new file mode 100644 index 000000000000..fa53ff11a509 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json @@ -0,0 +1,533 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xbf" + }, + { + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of near RET branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.REL_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfd" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of instructions retired.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", + "Deprecated": "1", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.4K_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of machines clears due to memory renaming.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MRN_NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", + "EventCode": "0x75", + "EventName": "SERIALIZATION.NON_C01_MS_SCB", + "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the total number of consumed retirement slots.", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the total number of uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json new file mode 100644 index 000000000000..67fd640f790e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json @@ -0,0 +1,47 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.PDE_CACHE_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 5e609b876790..78af105ca236 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,5 +1,6 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BE|BF),v1.15,alderlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core +GenuineIntel-6-BE,v1.16,alderlaken,core GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core GenuineIntel-6-(3D|47),v26,broadwell,core GenuineIntel-6-56,v23,broadwellde,core -- cgit v1.2.3 From a6a29bcf596141f95fc0f9756ba68de31ba1f46c Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:39 +0800 Subject: perf vendor events intel: Add uncore event list for Alderlake-N Add JSON uncore events for Alderlake-N Based on JSON list v1.16: https://github.com/intel/perfmon/tree/main/ADL/events/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-2-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlaken/uncore-memory.json | 175 +++++++++++++++++++++ .../arch/x86/alderlaken/uncore-other.json | 33 ++++ 2 files changed, 208 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json new file mode 100644 index 000000000000..2ccd9cf96957 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json @@ -0,0 +1,175 @@ +[ + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Read CAS command sent to DRAM", + "EventCode": "0x22", + "EventName": "UNC_M_CAS_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Write CAS command sent to DRAM", + "EventCode": "0x23", + "EventName": "UNC_M_CAS_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of clocks", + "EventCode": "0x01", + "EventName": "UNC_M_CLOCKTICKS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Empty", + "EventCode": "0x1D", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Empty", + "EventCode": "0x20", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Hit", + "EventCode": "0x1C", + "EventName": "UNC_M_DRAM_PAGE_HIT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Hit", + "EventCode": "0x1F", + "EventName": "UNC_M_DRAM_PAGE_HIT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Miss", + "EventCode": "0x1E", + "EventName": "UNC_M_DRAM_PAGE_MISS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Miss", + "EventCode": "0x21", + "EventName": "UNC_M_DRAM_PAGE_MISS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Hot state", + "EventCode": "0x19", + "EventName": "UNC_M_DRAM_THERMAL_HOT", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Warm state", + "EventCode": "0x1A", + "EventName": "UNC_M_DRAM_THERMAL_WARM", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming read prefetch request from IA.", + "EventCode": "0x0A", + "EventName": "UNC_M_PREFETCH_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", + "EventCode": "0x28", + "EventName": "UNC_M_PRE_COUNT_IDLE", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM for a read/write request", + "EventCode": "0x27", + "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 read request", + "EventCode": "0x02", + "EventName": "UNC_M_VC0_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 write request", + "EventCode": "0x03", + "EventName": "UNC_M_VC0_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 read request", + "EventCode": "0x04", + "EventName": "UNC_M_VC1_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 write request", + "EventCode": "0x05", + "EventName": "UNC_M_VC1_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json new file mode 100644 index 000000000000..f9e7777cd2be --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json @@ -0,0 +1,33 @@ +[ + { + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "CLOCK" + } +] -- cgit v1.2.3 From 2bb3fbad4c3b3b5b6d5ac537b6ab404443fc5224 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:40 +0800 Subject: perf vendor events intel: Add metrics for Alderlake-N Add JSON metrics for Alderlake-N to perf. It only included E-core metrics. E-core metrics based on E-core TMA v2.2 (E-core_TMA_Metrics.csv) It is downloaded from: https://github.com/intel/perfmon/ Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-3-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlaken/adln-metrics.json | 583 +++++++++++++++++++++ 1 file changed, 583 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json new file mode 100644 index 000000000000..c57e9f325fb0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -0,0 +1,583 @@ +[ + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_frontend_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_icache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_itlb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_detect", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_resteer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_bandwidth", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_cisc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_decode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_predecode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_other_fb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", + "MetricExpr": "(SLOTS - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_bad_speculation", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_branch_mispredicts", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_smc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_memory_ordering", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_fp_assist", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_disambiguation", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_page_fault", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls. ", + "MetricExpr": "max(0, tma_backend_bound - tma_load_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads. ", + "MetricExpr": "min((TOPDOWN_BE_BOUND.ALL / SLOTS), (LD_HEAD.ANY_AT_RET / CLKS) + tma_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_load_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", + "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / CLKS", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l1_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", + "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_store_fwd", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", + "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", + "MetricExpr": "LD_HEAD.PGWALK_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", + "MetricExpr": "LD_HEAD.OTHER_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_other_l1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_L2_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l2_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_LLC_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l3_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_DRAM_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_dram_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.", + "MetricExpr": "max(0, tma_load_store_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_other_load_store", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound_aux", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL2;tma_backend_bound_aux_group", + "MetricName": "tma_resource_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_st_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_ld_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative ", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_rsv", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_register", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_alloc_restriction", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_serialization", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the numer of issue slots that result in retirement slots. ", + "MetricExpr": "TOPDOWN_RETIRING.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_retiring", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are not from the microsequencer. ", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_base", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of floating point operations per uop with all default weighting.", + "MetricExpr": "UOPS_RETIRED.FPDIV / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_fp_uops", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_other_ret", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", + "MetricExpr": "UOPS_RETIRED.MS / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_ms_uops", + "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE", + "MetricName": "CLKS" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE_P", + "MetricName": "CLKS_P" + }, + { + "BriefDescription": "", + "MetricExpr": "5 * CLKS", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "INST_RETIRED.ANY / CLKS", + "MetricName": "IPC" + }, + { + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "CLKS / INST_RETIRED.ANY", + "MetricName": "CPI" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", + "MetricName": "UPI" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Store_Fwd_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Address_Alias_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Load_Splits" + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "IpBranch" + }, + { + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricName": "IpCall" + }, + { + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "IpLoad" + }, + { + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "IpStore" + }, + { + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "IpMispredict" + }, + { + "BriefDescription": "Instructions per Far Branch", + "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)", + "MetricName": "IpFarBranch" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "Branch_Mispredict_Ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are ucode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", + "MetricName": "Microcode_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", + "MetricName": "FPDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", + "MetricName": "IDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", + "MetricName": "X87_Uop_Ratio" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CLKS / CPU_CLK_UNHALTED.REF_TSC", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Cycle cost per L2 hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", + "MetricName": "Cycles_per_Demand_Load_L2_Hit" + }, + { + "BriefDescription": "Cycle cost per LLC hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", + "MetricName": "Cycles_per_Demand_Load_L3_Hit" + }, + { + "BriefDescription": "Cycle cost per DRAM hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "MetricName": "Cycles_per_Demand_Load_DRAM_Hit" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L2Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L3Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in DRAM", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_DRAMHit_Percent" + }, + { + "BriefDescription": "load ops retired per 1000 instruction", + "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricName": "MemLoadPKI" + }, + { + "BriefDescription": "C1 residency percent per core", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C8 residency percent per package", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C9 residency percent per package", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C10 residency percent per package", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" + } +] -- cgit v1.2.3 From 4c12f41a14d6c4dd2b4e387eaea249cee68bc01a Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 24 Nov 2022 11:14:41 +0800 Subject: perf vendor events intel: Update events and metrics for alderlake Update JSON events and metrics for alderlake to perf. Based on ADL JSON event list v1.16: https://github.com/intel/perfmon/tree/main/ADL/events Generate the event list and metrics with the converter scripts: https://github.com/intel/perfmon/pull/32 Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221124031441.110134-4-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/alderlake/adl-metrics.json | 73 +- .../perf/pmu-events/arch/x86/alderlake/cache.json | 1391 ++++++-------- .../arch/x86/alderlake/floating-point.json | 91 +- .../pmu-events/arch/x86/alderlake/frontend.json | 224 +-- .../perf/pmu-events/arch/x86/alderlake/memory.json | 214 +-- .../perf/pmu-events/arch/x86/alderlake/other.json | 132 +- .../pmu-events/arch/x86/alderlake/pipeline.json | 1932 +++++++------------- .../arch/x86/alderlake/uncore-memory.json | 185 +- .../arch/x86/alderlake/uncore-other.json | 73 +- .../arch/x86/alderlake/virtual-memory.json | 223 +-- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 11 files changed, 1687 insertions(+), 2853 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index e06d26ad5138..edf440e9359a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -1287,14 +1287,14 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", "MetricGroup": "HPC;Summary", "MetricName": "CPU_Utilization", "Unit": "cpu_core" }, { "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]", - "MetricExpr": "Turbo_Utilization * msr@tsc@ / 1000000000 / duration_time", + "MetricExpr": "Turbo_Utilization * TSC / 1000000000 / duration_time", "MetricGroup": "Power;Summary", "MetricName": "Average_Frequency", "Unit": "cpu_core" @@ -1337,18 +1337,25 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1000000 / duration_time / 1000", + "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1000000 / duration_time / 1000", "MetricGroup": "HPC;Mem;MemoryBW;SoC", "MetricName": "DRAM_BW_Use", "Unit": "cpu_core" }, { "BriefDescription": "Average number of parallel requests to external memory. Accounts for all requests", - "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / arb@event\\=0x81\\,umask\\=0x1@", + "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL", "MetricGroup": "Mem;SoC", "MetricName": "MEM_Parallel_Requests", "Unit": "cpu_core" }, + { + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricExpr": "UNC_CLOCK.SOCKET", + "MetricGroup": "SoC", + "MetricName": "Socket_CLKS", + "Unit": "cpu_core" + }, { "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", @@ -1356,6 +1363,12 @@ "MetricName": "IpFarBranch", "Unit": "cpu_core" }, + { + "BriefDescription": "Uncore frequency per die [GHZ]", + "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1000000000", + "MetricGroup": "SoC", + "MetricName": "UNCORE_FREQ" + }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS", @@ -1902,7 +1915,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", "MetricName": "CPU_Utilization", "Unit": "cpu_atom" }, @@ -1950,62 +1963,72 @@ }, { "BriefDescription": "C1 residency percent per core", - "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C1_Core_Residency" + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C6 residency percent per core", - "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C6_Core_Residency" + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C7 residency percent per core", - "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C7_Core_Residency" + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C2 residency percent per package", - "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C2_Pkg_Residency" + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C3 residency percent per package", - "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C3_Pkg_Residency" + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C6 residency percent per package", - "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C6_Pkg_Residency" + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C7 residency percent per package", - "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C7_Pkg_Residency" + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C8 residency percent per package", - "MetricExpr": "(cstate_pkg@c8\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C8_Pkg_Residency" + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C9 residency percent per package", - "MetricExpr": "(cstate_pkg@c9\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C9_Pkg_Residency" + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" }, { "BriefDescription": "C10 residency percent per package", - "MetricExpr": "(cstate_pkg@c10\\-residency@ / msr@tsc@) * 100", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", "MetricGroup": "Power", - "MetricName": "C10_Pkg_Residency" + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json index 2cc62d2779d2..adc9887b8ae0 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json @@ -1,1178 +1,871 @@ [ - { - "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.MISS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x41", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x4f", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.IFETCH", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x38", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.LOAD", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x34", - "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x04", - "EventName": "MEM_SCHEDULER_BLOCK.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x04", - "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x04", - "EventName": "MEM_SCHEDULER_BLOCK.RSV", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x04", - "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of load uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x81", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of store uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x82", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x80", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x10", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x100", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x20", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x4", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x200", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x40", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", - "L1_Hit_Indication": "1", - "MSRIndex": "0x3F6", - "MSRValue": "0x8", - "PEBS": "2", - "PEBScounters": "0,1", - "SampleAfterValue": "1000003", - "TakenAlone": "1", - "UMask": "0x5", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired split load uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x41", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "Data_LA": "1", - "EventCode": "0xd0", - "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "L1_Hit_Indication": "1", - "PEBS": "2", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x6", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F803C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10003C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x4003C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x8003C0001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.L3_HIT", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F803C0002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10003C0002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ICACHE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, { "BriefDescription": "L1D.HWPF_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x51", "EventName": "L1D.HWPF_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x51", "EventName": "L1D.REPLACEMENT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.FB_FULL", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { - "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "Deprecated": "1", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.L2_STALL", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.L2_STALLS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Number of L1D misses that are outstanding", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x48", "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache lines filling L2", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x25", "EventName": "L2_LINES_IN.ALL", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1f", "Unit": "cpu_core" }, { "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x26", "EventName": "L2_LINES_OUT.USELESS_HWPF", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_REQUEST.ALL", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.REFERENCES]", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xff", "Unit": "cpu_core" }, { "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_RQSTS.MISS]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_REQUEST.MISS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.MISS]", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x3f", "Unit": "cpu_core" }, { "BriefDescription": "L2 code requests", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_CODE_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the total number of L2 code requests.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xe4", "Unit": "cpu_core" }, { "BriefDescription": "Demand Data Read access L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xe1", "Unit": "cpu_core" }, { "BriefDescription": "Demand requests that miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts demand requests that miss L2 cache.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x27", "Unit": "cpu_core" }, { "BriefDescription": "L2_RQSTS.ALL_HWPF", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_HWPF", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xf0", "Unit": "cpu_core" }, { "BriefDescription": "RFO requests to L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.ALL_RFO", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xe2", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.CODE_RD_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xc4", "Unit": "cpu_core" }, { "BriefDescription": "L2 cache misses when fetching instructions", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.CODE_RD_MISS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x24", "Unit": "cpu_core" }, { "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0xc1", "Unit": "cpu_core" }, { "BriefDescription": "Demand Data Read miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x21", "Unit": "cpu_core" }, { "BriefDescription": "L2_RQSTS.HWPF_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.HWPF_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x30", "Unit": "cpu_core" }, { "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x24", "EventName": "L2_RQSTS.MISS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.MISS]", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x3f", "Unit": "cpu_core" }, { - "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.REFERENCES", - "PEBScounters": "0,1,2,3", + "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.ALL]", + "SampleAfterValue": "200003", + "UMask": "0xff", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that hit L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "RFO requests that miss L2 cache", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22", + "Unit": "cpu_core" + }, + { + "BriefDescription": "SW prefetch requests that hit L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_HIT", + "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0xc8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "SW prefetch requests that miss L2 cache.", + "EventCode": "0x24", + "EventName": "L2_RQSTS.SWPF_MISS", + "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.", + "SampleAfterValue": "200003", + "UMask": "0x28", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x41", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x4f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x4f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x38", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).", "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xff", - "Unit": "cpu_core" + "UMask": "0x20", + "Unit": "cpu_atom" }, { - "BriefDescription": "RFO requests that hit L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.RFO_HIT", - "PEBScounters": "0,1,2,3", + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc2", - "Unit": "cpu_core" + "UMask": "0x8", + "Unit": "cpu_atom" }, { - "BriefDescription": "RFO requests that miss L2 cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.RFO_MISS", - "PEBScounters": "0,1,2,3", + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x22", - "Unit": "cpu_core" + "UMask": "0x10", + "Unit": "cpu_atom" }, { - "BriefDescription": "SW prefetch requests that hit L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.SWPF_HIT", - "PEBScounters": "0,1,2,3", + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD", "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xc8", - "Unit": "cpu_core" + "UMask": "0x7", + "Unit": "cpu_atom" }, { - "BriefDescription": "SW prefetch requests that miss L2 cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x24", - "EventName": "L2_RQSTS.SWPF_MISS", - "PEBScounters": "0,1,2,3", + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x28", - "Unit": "cpu_core" + "UMask": "0x4", + "Unit": "cpu_atom" }, { - "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.MISS", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x41", - "Unit": "cpu_core" + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_atom" }, { - "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x2e", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", - "UMask": "0x4f", - "Unit": "cpu_core" + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" }, { "BriefDescription": "Retired load instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.ALL_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.", "SampleAfterValue": "1000003", "UMask": "0x81", "Unit": "cpu_core" }, { "BriefDescription": "Retired store instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.ALL_STORES", - "L1_Hit_Indication": "1", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all retired store instructions.", "SampleAfterValue": "1000003", "UMask": "0x82", "Unit": "cpu_core" }, { "BriefDescription": "All retired memory instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.ANY", - "L1_Hit_Indication": "1", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", "SampleAfterValue": "1000003", "UMask": "0x83", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions with locked access.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.LOCK_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired load instructions with locked access.", "SampleAfterValue": "100007", "UMask": "0x21", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions that split across a cacheline boundary.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "UMask": "0x41", "Unit": "cpu_core" }, { "BriefDescription": "Retired store instructions that split across a cacheline boundary.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "L1_Hit_Indication": "1", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "UMask": "0x42", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions that miss the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x11", "Unit": "cpu_core" }, { "BriefDescription": "Retired store instructions that miss the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "L1_Hit_Indication": "1", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x12", "Unit": "cpu_core" }, { "BriefDescription": "Completed demand load uops that miss the L1 d-cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xfd", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", "SampleAfterValue": "20011", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", "SampleAfterValue": "20011", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", "PEBS": "1", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.", "SampleAfterValue": "20011", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", + "Data_LA": "1", + "EventCode": "0xd3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "Data_LA": "1", + "EventCode": "0xd4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).", + "SampleAfterValue": "100007", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "100021", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", + "SampleAfterValue": "100021", "UMask": "0x4", "Unit": "cpu_core" }, { - "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "50021", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ALL", + "SampleAfterValue": "20003", + "UMask": "0x7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", + "SampleAfterValue": "20003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.RSV", + "SampleAfterValue": "20003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", + "SampleAfterValue": "20003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "MEM_STORE_RETIRED.L2_HIT", + "EventCode": "0x44", + "EventName": "MEM_STORE_RETIRED.L2_HIT", + "SampleAfterValue": "200003", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of load uops retired.", "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100003", - "UMask": "0x8", - "Unit": "cpu_core" + "PublicDescription": "Counts the total number of load uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x81", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of store uops retired.", "Data_LA": "1", - "EventCode": "0xd2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "20011", - "UMask": "0x2", - "Unit": "cpu_core" + "PublicDescription": "Counts the total number of store uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x82", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x1", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x4", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100007", - "UMask": "0x40", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", - "UMask": "0x1", - "Unit": "cpu_core" + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x8", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x2", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100021", - "UMask": "0x10", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "100021", - "UMask": "0x4", - "Unit": "cpu_core" + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" }, { - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "BriefDescription": "Counts the number of retired split load uops.", "Data_LA": "1", - "EventCode": "0xd1", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "PEBS": "1", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "50021", - "UMask": "0x20", - "Unit": "cpu_core" + "SampleAfterValue": "200003", + "UMask": "0x41", + "Unit": "cpu_atom" }, { - "BriefDescription": "MEM_STORE_RETIRED.L2_HIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "EventCode": "0x44", - "EventName": "MEM_STORE_RETIRED.L2_HIT", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "200003", - "UMask": "0x1", - "Unit": "cpu_core" + "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "PEBS": "2", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" }, { "BriefDescription": "Retired memory uops for any access", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe5", "EventName": "MEM_UOP_RETIRED.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses", "SampleAfterValue": "1000003", "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", @@ -1181,9 +874,28 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", @@ -1192,9 +904,28 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", @@ -1205,139 +936,111 @@ }, { "BriefDescription": "OFFCORE_REQUESTS.ALL_REQUESTS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Demand and prefetch data reads", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.DATA_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Demand Data Read requests sent to uncore", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", + "Deprecated": "1", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "PEBScounters": "0,1,2,3", + "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "Errata": "ADL038", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.NTA", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHW instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHW instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHT0 instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.T0", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x40", "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json index 48a4605fc057..3eb7cab9b431 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json @@ -1,165 +1,124 @@ [ - { - "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.FP_ASSIST", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.FPDIV", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x8", - "Unit": "cpu_atom" - }, { "BriefDescription": "ARITH.FPDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb0", "EventName": "ARITH.FPDIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts all microcode FP assists.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.FP", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts all microcode Floating Point assists.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "ASSISTS.SSE_AVX_MIX", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.SSE_AVX_MIX", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "100003", "UMask": "0x2", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json index da1a7ba0e568..250cd128b674 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json @@ -1,536 +1,416 @@ [ { "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xe6", "EventName": "BACLEARS.ANY", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x80", - "EventName": "ICACHE.ACCESSES", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of instruction cache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x80", - "EventName": "ICACHE.MISSES", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, { "BriefDescription": "Stalls caused by changing prefix length of the instruction.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x87", "EventName": "DECODE.LCP", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles the Microcode Sequencer is busy.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x87", "EventName": "DECODE.MS_BUSY", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "DSB-to-MITE switch true penalty cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x61", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced DSB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x1", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x11", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced iTLB true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x14", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x12", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.L2_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x13", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", "MSRIndex": "0x3F7", "MSRValue": "0x600106", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", "MSRIndex": "0x3F7", "MSRValue": "0x608006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", "MSRValue": "0x601006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", "MSRValue": "0x600206", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", "MSRValue": "0x610006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", "MSRValue": "0x100206", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", "MSRValue": "0x602006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", "MSRValue": "0x600406", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", "MSRValue": "0x620006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", "MSRIndex": "0x3F7", "MSRValue": "0x604006", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", "MSRValue": "0x600806", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.MS_FLOWS", "MSRIndex": "0x3F7", "MSRValue": "0x8", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", "MSRValue": "0x15", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH", "MSRIndex": "0x3F7", "MSRValue": "0x17", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction cache misses.", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "ICACHE_DATA.STALLS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x83", "EventName": "ICACHE_TAG.STALLS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles DSB is delivering optimal number of Uops", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "6", "EventCode": "0x79", "EventName": "IDQ.DSB_CYCLES_OK", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.DSB_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles MITE is delivering any Uop", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.MITE_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles MITE is delivering optimal number of Uops", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "6", "EventCode": "0x79", "EventName": "IDQ.MITE_CYCLES_OK", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.MITE_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x79", "EventName": "IDQ.MS_CYCLES_ANY", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Number of switches from DSB or MITE to the MS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0x79", "EventName": "IDQ.MS_SWITCHES", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops delivered to IDQ while MS is busy", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x79", "EventName": "IDQ.MS_UOPS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "6", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0x9c", "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json index f894e4a0212b..7595eb4ab46f 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json @@ -1,339 +1,234 @@ [ + { + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "EventCode": "0xa3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.ANY_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xff", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.L1_BOUND_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xf4", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.OTHER_AT_RET", - "PEBScounters": "0,1,2,3,4,5", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xc0", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.PGWALK_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xa0", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0x05", "EventName": "LD_HEAD.ST_ADDR_AT_RET", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x84", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "PEBScounters": "0,1,2,3,4,5", "SampleAfterValue": "20003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_atom" }, - { - "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F84400001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F84400001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.L3_MISS", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F84400002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x3F84400002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", - "CounterMask": "6", - "EventCode": "0xa3", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "PEBScounters": "0,1,2,3", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x6", - "Unit": "cpu_core" - }, { "BriefDescription": "Number of machine clears due to memory ordering conflicts.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "2", "EventCode": "0x47", "EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "3", "EventCode": "0x47", "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x3", "Unit": "cpu_core" }, { "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "5", "EventCode": "0x47", "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x5", "Unit": "cpu_core" }, { "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "9", "EventCode": "0x47", "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x9", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "1009", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "20011", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "503", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "100007", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "100003", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "101", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "2003", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "CollectPEBSRecord": "2", - "Counter": "1,2,3,4,5,6,7", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", "PEBS": "2", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", "SampleAfterValue": "50021", - "TakenAlone": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.", - "CollectPEBSRecord": "2", "Data_LA": "1", "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE", "PEBS": "2", + "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", "MSRIndex": "0x1a6,0x1a7", @@ -342,9 +237,28 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_RFO.L3_MISS", "MSRIndex": "0x1a6,0x1a7", @@ -352,5 +266,33 @@ "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "SampleAfterValue": "100003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json index c49d8ce27310..329c611d7cf7 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json @@ -1,111 +1,66 @@ [ - { - "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.COREWB_M.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10008", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand data reads that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10001", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10002", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts streaming stores that have any type of response.", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xB7", - "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x10800", - "SampleAfterValue": "100003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, { "BriefDescription": "ASSISTS.HARDWARE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.HARDWARE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "ASSISTS.PAGE_FAULT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.PAGE_FAULT", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "CORE_POWER.LICENSE_1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x28", "EventName": "CORE_POWER.LICENSE_1", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "CORE_POWER.LICENSE_2", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x28", "EventName": "CORE_POWER.LICENSE_2", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "CORE_POWER.LICENSE_3", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x28", "EventName": "CORE_POWER.LICENSE_3", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "200003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.COREWB_M.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10008", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand data reads that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -116,7 +71,6 @@ }, { "BriefDescription": "Counts demand data reads that were supplied by DRAM.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_DATA_RD.DRAM", "MSRIndex": "0x1a6,0x1a7", @@ -125,9 +79,18 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x2A,0x2B", "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -138,7 +101,16 @@ }, { "BriefDescription": "Counts streaming stores that have any type of response.", - "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", "EventCode": "0x2A,0x2B", "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", @@ -149,68 +121,52 @@ }, { "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa5", "EventName": "RS.EMPTY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0xa5", "EventName": "RS.EMPTY_COUNT", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EdgeDetect": "1", "EventCode": "0xa5", "EventName": "RS_EMPTY.COUNT", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", + "Deprecated": "1", "EventCode": "0xa5", "EventName": "RS_EMPTY.CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x7", "Unit": "cpu_core" }, { "BriefDescription": "XQ.FULL_CYCLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x2d", "EventName": "XQ.FULL_CYCLES", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json index 1a137f7f8b7e..f46fa7ba168a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json @@ -1,2168 +1,1634 @@ [ - { - "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf9", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.COND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xbf", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.INDIRECT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.IND_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf9", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near RET branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.REL_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfd", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc4", - "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.COND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.INDIRECT", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.IND_CALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0x7e", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xeb", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.RETURN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xf7", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc5", - "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "UMask": "0xfe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.CORE", - "PEBScounters": "33", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.CORE_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PEBScounters": "34", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "PEBScounters": "33", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of unhalted core clock cycles.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x3c", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", - "EventName": "INST_RETIRED.ANY", - "PEBS": "1", - "PEBScounters": "32", - "SampleAfterValue": "2000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc0", - "EventName": "INST_RETIRED.ANY_P", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.4K_ALIAS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x03", - "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.DISAMBIGUATION", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machines clears due to memory renaming.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.MRN_NUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.PAGE_FAULT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.SLOW", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x6f", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.SMC", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "20003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x75", - "EventName": "SERIALIZATION.NON_C01_MS_SCB", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x3", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x73", - "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.REGISTER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x40", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x74", - "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ALL", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x2", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x40", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.CISC", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.DECODE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x8d", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x72", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.ITLB", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.OTHER", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x71", - "EventName": "TOPDOWN_FE_BOUND.PREDECODE", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x4", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of consumed retirement slots.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "TOPDOWN_RETIRING.ALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.ALL", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of integer divide uops retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.IDIV", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x10", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.MS", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0xc2", - "EventName": "UOPS_RETIRED.X87", - "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "UMask": "0x2", - "Unit": "cpu_atom" - }, { "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xb0", "EventName": "ARITH.DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x9", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb0", "EventName": "ARITH.DIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x9", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xb0", "EventName": "ARITH.FP_DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "This event counts the cycles the integer divider is busy.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb0", "EventName": "ARITH.IDIV_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xb0", "EventName": "ARITH.INT_DIVIDER_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc1", "EventName": "ASSISTS.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1b", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "200003", + "Unit": "cpu_atom" + }, { "BriefDescription": "All branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts all branch instructions retired.", "SampleAfterValue": "400009", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, { "BriefDescription": "Conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x11", "Unit": "cpu_core" }, { "BriefDescription": "Not taken branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_NTAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts not taken branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Taken conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts taken conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xbf", + "Unit": "cpu_atom" + }, { "BriefDescription": "Far branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts far branch instructions retired.", "SampleAfterValue": "100007", "UMask": "0x40", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, { "BriefDescription": "Indirect near branch instructions retired (excluding returns)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.", "SampleAfterValue": "100003", "UMask": "0x80", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9", + "Unit": "cpu_atom" + }, { "BriefDescription": "Direct and indirect near call instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts both direct and indirect near call instructions retired.", "SampleAfterValue": "100007", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near RET branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, { "BriefDescription": "Return instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts return instructions retired.", "SampleAfterValue": "100007", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Taken branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts taken branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x20", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.REL_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfd", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "200003", + "Unit": "cpu_atom" + }, { "BriefDescription": "All mispredicted branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", "SampleAfterValue": "400009", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, { "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x11", "Unit": "cpu_core" }, { "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_NTAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.", "SampleAfterValue": "400009", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, { "BriefDescription": "number of branch instructions retired that were mispredicted and taken.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.", "SampleAfterValue": "400009", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, { "BriefDescription": "Mispredicted indirect CALL retired.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.", "SampleAfterValue": "400009", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, { "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.", "SampleAfterValue": "400009", "UMask": "0x20", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, { "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RET", "PEBS": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", "SampleAfterValue": "100007", "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C01", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C02", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.C0_WAIT", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x70", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.", "SampleAfterValue": "25003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.PAUSE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EdgeDetect": "1", "EventCode": "0xec", "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.", + "SampleAfterValue": "2000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, { "BriefDescription": "Reference cycles when the core is not in halt state.", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PEBScounters": "34", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Reference cycles when the core is not in halt state.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Core cycles when the thread is not in halt state", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", - "PEBScounters": "33", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Thread cycles when thread is not in halt state", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x3c", "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", "SampleAfterValue": "2000003", - "Speculative": "1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "8", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "16", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "12", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0xc", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "5", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x5", "Unit": "cpu_core" }, { "BriefDescription": "Total execution stalls.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xa3", "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "5", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x21", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa6", "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Instruction decoders utilized in a cycle", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x75", "EventName": "INST_DECODED.DECODERS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PEBS": "1", - "PEBScounters": "32", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of instructions retired.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", "PEBS": "1", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.", "SampleAfterValue": "2000003", "Unit": "cpu_core" }, { "BriefDescription": "INST_RETIRED.MACRO_FUSED", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.MACRO_FUSED", - "PEBScounters": "1,2,3,4,5,6,7", "SampleAfterValue": "2000003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Retired NOP instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.NOP", - "PEBScounters": "1,2,3,4,5,6,7", + "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Precise instruction retired with PEBS precise-distribution", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.PREC_DIST", "PEBS": "1", - "PEBScounters": "32", + "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "INST_RETIRED.REP_ITERATION", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc0", "EventName": "INST_RETIRED.REP_ITERATION", - "PEBScounters": "1,2,3,4,5,6,7", "SampleAfterValue": "2000003", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.RECOVERY_CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", "SampleAfterValue": "500009", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES", "MSRIndex": "0x3F7", "MSRValue": "0x7", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", - "Speculative": "1", - "TakenAlone": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots where uops got dropped", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xad", "EventName": "INT_MISC.UOP_DROPPING", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.128BIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.128BIT", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x13", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.256BIT", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.256BIT", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0xac", "Unit": "cpu_core" }, { "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.ADD_128", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.", "SampleAfterValue": "1000003", "UMask": "0x3", "Unit": "cpu_core" }, { "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.ADD_256", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.", "SampleAfterValue": "1000003", "UMask": "0xc", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.MUL_256", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.MUL_256", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.SHUFFLES", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.SHUFFLES", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.VNNI_128", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.VNNI_128", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "INT_VEC_RETIRED.VNNI_256", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe7", "EventName": "INT_VEC_RETIRED.VNNI_256", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x20", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", + "Deprecated": "1", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.4K_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, { "BriefDescription": "False dependencies in MOB due to partial compare on address.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x03", "EventName": "LD_BLOCKS.NO_SR", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x88", "Unit": "cpu_core" }, { "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x03", "EventName": "LD_BLOCKS.STORE_FORWARD", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x82", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xa8", "EventName": "LSD.CYCLES_ACTIVE", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "6", "EventCode": "0xa8", "EventName": "LSD.CYCLES_OK", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { - "BriefDescription": "Number of Uops delivered by the LSD.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0xa8", - "EventName": "LSD.UOPS", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_core" + "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xa8", + "EventName": "LSD.UOPS", + "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.COUNT", + "PublicDescription": "Counts the number of machine clears (nukes) of any type.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machines clears due to memory renaming.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MRN_NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20", + "Unit": "cpu_atom" }, { - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", - "CounterMask": "1", - "EdgeDetect": "1", + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", "EventCode": "0xc3", - "EventName": "MACHINE_CLEARS.COUNT", - "PEBScounters": "0,1,2,3,4,5,6,7", - "SampleAfterValue": "100003", - "Speculative": "1", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "20003", "UMask": "0x1", - "Unit": "cpu_core" + "Unit": "cpu_atom" }, { "BriefDescription": "Self-modifying code (SMC) detected.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc3", "EventName": "MACHINE_CLEARS.SMC", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "MISC2_RETIRED.LFENCE", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xe0", "EventName": "MISC2_RETIRED.LFENCE", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "400009", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Increments whenever there is an update to the LBR array.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xcc", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", "SampleAfterValue": "100003", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa2", "EventName": "RESOURCE_STALLS.SB", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa2", "EventName": "RESOURCE_STALLS.SCOREBOARD", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", + "EventCode": "0x75", + "EventName": "SERIALIZATION.NON_C01_MS_SCB", + "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots wasted due to incorrect speculations.", - "CollectPEBSRecord": "2", "EventCode": "0xa4", "EventName": "TOPDOWN.BAD_SPEC_SLOTS", + "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions", - "CollectPEBSRecord": "2", "EventCode": "0xa4", "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS", + "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "Fixed counter 3", "EventName": "TOPDOWN.SLOTS", - "PEBScounters": "35", + "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xa4", "EventName": "TOPDOWN.SLOTS_P", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.", "SampleAfterValue": "10000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the total number of consumed retirement slots.", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "UOPS_DECODED.DEC0_UOPS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x76", "EventName": "UOPS_DECODED.DEC0_UOPS", - "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 0", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_0", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 0.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 1", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 1.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 2, 3 and 10", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_2_3_10", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 4 and 9", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_4_9", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 4 and 9", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 5 and 11", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_5_11", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 5 and 11", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on port 6", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_6", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution port 6.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x40", "Unit": "cpu_core" }, { "BriefDescription": "Uops executed on ports 7 and 8", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb2", "EventName": "UOPS_DISPATCHED.PORT_7_8", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x80", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "3", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "2", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_2", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "3", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_3", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "4", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.CYCLES_GE_4", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.STALLS", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.STALL_CYCLES", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.THREAD", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, { "BriefDescription": "Counts the number of x87 uops dispatched.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xb1", "EventName": "UOPS_EXECUTED.X87", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Uops that RAT issues to RS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xae", "EventName": "UOPS_ISSUED.ANY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", - "Speculative": "1", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the total number of uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "2000003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles with retired uop(s).", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.CYCLES", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts cycles where at least one uop has retired.", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Retired uops except the last uop of each instruction.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.HEAVY", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "UOPS_RETIRED.MS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", "MSRIndex": "0x3F7", "MSRValue": "0x8", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "2000003", - "TakenAlone": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Retirement slots used.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.SLOTS", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "Counts the retirement slots used each cycle.", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Cycles without actually retired uops.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.STALLS", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "Deprecated": "1", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.STALL_CYCLES", "Invert": "1", - "PEBScounters": "0,1,2,3,4,5,6,7", "SampleAfterValue": "1000003", "UMask": "0x2", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json index d82d6f62a6fb..2ccd9cf96957 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json @@ -1,221 +1,174 @@ [ { - "BriefDescription": "Number of clocks", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x01", - "EventName": "UNC_M_CLOCKTICKS", + "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", "PerPkg": "1", + "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC0 read request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x02", - "EventName": "UNC_M_VC0_REQUESTS_RD", + "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC0 write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x03", - "EventName": "UNC_M_VC0_REQUESTS_WR", + "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", "PerPkg": "1", + "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC1 read request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x04", - "EventName": "UNC_M_VC1_REQUESTS_RD", + "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming VC1 write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x05", - "EventName": "UNC_M_VC1_REQUESTS_WR", + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Incoming read prefetch request from IA", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x0A", - "EventName": "UNC_M_PREFETCH_RD", + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Any Rank at Hot state", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x19", - "EventName": "UNC_M_DRAM_THERMAL_HOT", + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Any Rank at Warm state", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1A", - "EventName": "UNC_M_DRAM_THERMAL_WARM", + "BriefDescription": "Read CAS command sent to DRAM", + "EventCode": "0x22", + "EventName": "UNC_M_CAS_COUNT_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "incoming read request page status is Page Hit", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1C", - "EventName": "UNC_M_DRAM_PAGE_HIT_RD", + "BriefDescription": "Write CAS command sent to DRAM", + "EventCode": "0x23", + "EventName": "UNC_M_CAS_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of clocks", + "EventCode": "0x01", + "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "incoming read request page status is Page Empty", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x1D", "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "incoming read request page status is Page Miss", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x1E", - "EventName": "UNC_M_DRAM_PAGE_MISS_RD", + "BriefDescription": "incoming write request page status is Page Empty", + "EventCode": "0x20", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Hit", + "EventCode": "0x1C", + "EventName": "UNC_M_DRAM_PAGE_HIT_RD", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "incoming write request page status is Page Hit", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x1F", "EventName": "UNC_M_DRAM_PAGE_HIT_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "incoming write request page status is Page Empty", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x20", - "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", + "BriefDescription": "incoming read request page status is Page Miss", + "EventCode": "0x1E", + "EventName": "UNC_M_DRAM_PAGE_MISS_RD", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "incoming write request page status is Page Miss", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x21", "EventName": "UNC_M_DRAM_PAGE_MISS_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Read CAS command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x22", - "EventName": "UNC_M_CAS_COUNT_RD", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Write CAS command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x23", - "EventName": "UNC_M_CAS_COUNT_WR", + "BriefDescription": "Any Rank at Hot state", + "EventCode": "0x19", + "EventName": "UNC_M_DRAM_THERMAL_HOT", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command for a read request sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x24", - "EventName": "UNC_M_ACT_COUNT_RD", + "BriefDescription": "Any Rank at Warm state", + "EventCode": "0x1A", + "EventName": "UNC_M_DRAM_THERMAL_WARM", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command for a write request sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x25", - "EventName": "UNC_M_ACT_COUNT_WR", + "BriefDescription": "Incoming read prefetch request from IA.", + "EventCode": "0x0A", + "EventName": "UNC_M_PREFETCH_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "ACT command sent to DRAM", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x26", - "EventName": "UNC_M_ACT_COUNT_TOTAL", + "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", + "EventCode": "0x28", + "EventName": "UNC_M_PRE_COUNT_IDLE", "PerPkg": "1", "Unit": "iMC" }, { "BriefDescription": "PRE command sent to DRAM for a read/write request", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", "EventCode": "0x27", "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", - "Counter": "0,1,2,3,4", - "CounterType": "PGMABLE", - "EventCode": "0x28", - "EventName": "UNC_M_PRE_COUNT_IDLE", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels)", - "CounterType": "FREERUN", - "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC0 read request", + "EventCode": "0x02", + "EventName": "UNC_M_VC0_REQUESTS_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels)", - "Counter": "3", - "CounterType": "FREERUN", - "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC0 write request", + "EventCode": "0x03", + "EventName": "UNC_M_VC0_REQUESTS_WR", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM", - "Counter": "1", - "CounterType": "FREERUN", - "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC1 read request", + "EventCode": "0x04", + "EventName": "UNC_M_VC1_REQUESTS_RD", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM", - "Counter": "4", - "CounterType": "FREERUN", - "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "BriefDescription": "Incoming VC1 write request", + "EventCode": "0x05", + "EventName": "UNC_M_VC1_REQUESTS_WR", "PerPkg": "1", "Unit": "iMC" } diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json index b1ae349f5f21..bc5fb6b76065 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json @@ -1,40 +1,73 @@ [ { - "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", - "Counter": "Fixed", - "CounterType": "PGMABLE", - "EventCode": "0xff", - "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", "PerPkg": "1", - "Unit": "CLOCK" + "UMask": "0x1", + "Unit": "ARB" }, { - "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC", - "Counter": "0,1", - "CounterType": "PGMABLE", + "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core.", + "EventCode": "0x85", + "EventName": "UNC_ARB_DAT_OCCUPANCY.RD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Number of coherent read requests sent to memory controller that were issued by any core.", "EventCode": "0x81", - "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "EventName": "UNC_ARB_DAT_REQUESTS.RD", "PerPkg": "1", - "UMask": "0x01", + "UMask": "0x2", "Unit": "ARB" }, { - "BriefDescription": "Number of requests allocated in Coherency Tracker", - "Counter": "0,1", - "CounterType": "PGMABLE", - "EventCode": "0x84", - "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_OCCUPANCY.ALL", + "EventCode": "0x85", + "EventName": "UNC_ARB_IFA_OCCUPANCY.ALL", "PerPkg": "1", - "UMask": "0x01", + "UMask": "0x1", "Unit": "ARB" }, { - "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic", - "CounterType": "PGMABLE", + "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_REQUESTS.RD", + "EventCode": "0x81", + "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD", + "PerPkg": "1", + "UMask": "0x2", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.", "EventCode": "0x80", "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", "PerPkg": "1", - "UMask": "0x01", + "UMask": "0x1", "Unit": "ARB" + }, + { + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "CLOCK" } ] diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json index 12baf768ad8d..3827d292da80 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json @@ -1,317 +1,236 @@ [ - { - "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x49", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x1", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.PDE_CACHE_MISS", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "2000003", - "Speculative": "1", - "UMask": "0x80", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x85", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "200003", - "Speculative": "1", - "UMask": "0xe", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3,4,5", - "EventCode": "0x05", - "EventName": "LD_HEAD.DTLB_MISS_AT_RET", - "PEBScounters": "0,1,2,3,4,5", - "SampleAfterValue": "1000003", - "Speculative": "1", - "UMask": "0x90", - "Unit": "cpu_atom" - }, { "BriefDescription": "Loads that miss the DTLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 1G page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data load to a 4K page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x12", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, { "BriefDescription": "Stores that miss the DTLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 1G page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x8", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Page walks completed due to a demand data store to a 4K page.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x13", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.PDE_CACHE_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, { "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.STLB_HIT", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x20", "Unit": "cpu_core" }, { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_ACTIVE", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0xe", "Unit": "cpu_core" }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x4", "Unit": "cpu_core" }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x2", "Unit": "cpu_core" }, { "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.", - "CollectPEBSRecord": "2", - "Counter": "0,1,2,3", "EventCode": "0x11", "EventName": "ITLB_MISSES.WALK_PENDING", - "PEBScounters": "0,1,2,3", + "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.", "SampleAfterValue": "100003", - "Speculative": "1", "UMask": "0x10", "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90", + "Unit": "cpu_atom" } ] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 78af105ca236..df47462a125f 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,5 +1,5 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.16,alderlake,core GenuineIntel-6-BE,v1.16,alderlaken,core GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core GenuineIntel-6-(3D|47),v26,broadwell,core -- cgit v1.2.3 From 6f520ce17920b3cdfbd2479b3ccf27f9706219d0 Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Wed, 23 Nov 2022 15:48:16 +0530 Subject: perf symbol: correction while adjusting symbol perf doesn't provide proper symbol information for specially crafted .debug files. Sometimes .debug file may not have similar program header as runtime ELF file. For example if we generate .debug file using objcopy --only-keep-debug resulting file will not contain .text, .data and other runtime sections. That means corresponding program headers will have zero FileSiz and modified Offset. Example: program header of text section of libxxx.so: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x00000000003d3000 0x00000000003d3000 0x00000000003d3000 0x000000000055ae80 0x000000000055ae80 R E 0x1000 Same program header after executing: objcopy --only-keep-debug libxxx.so libxxx.so.debug LOAD 0x0000000000001000 0x00000000003d3000 0x00000000003d3000 0x0000000000000000 0x000000000055ae80 R E 0x1000 Offset and FileSiz have been changed. Following formula will not provide correct value, if program header taken from .debug file (syms_ss): sym.st_value -= phdr.p_vaddr - phdr.p_offset; Correct program header information is located inside runtime ELF file (runtime_ss). Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols") Signed-off-by: Ajay Kaher Cc: Alexander Shishkin Cc: Alexey Makhalov Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srivatsa S. Bhat Cc: Steven Rostedt (VMware) Cc: Vasavi Sirnapalli Link: http://lore.kernel.org/lkml/1669198696-50547-1-git-send-email-akaher@vmware.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 647b7dff8ef3..80345695b136 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1303,7 +1303,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, (!used_opd && syms_ss->adjust_symbols)) { GElf_Phdr phdr; - if (elf_read_program_header(syms_ss->elf, + if (elf_read_program_header(runtime_ss->elf, (u64)sym.st_value, &phdr)) { pr_debug4("%s: failed to find program header for " "symbol: %s st_value: %#" PRIx64 "\n", -- cgit v1.2.3 From 1a9c20b45d193ead21dc63b07d1abb40b0a237c2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:05 -0800 Subject: perf list: Support newlines in wordwrap Rather than a newline starting from column 0, record a newline was seen and then add the newline and space before the next word. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 84fa2d050eac..f3750331e8f6 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -74,17 +74,19 @@ static void wordwrap(const char *s, int start, int max, int corr) { int column = start; int n; + bool saw_newline = false; while (*s) { - int wlen = strcspn(s, " \t"); + int wlen = strcspn(s, " \t\n"); - if (column + wlen >= max && column > start) { + if ((column + wlen >= max && column > start) || saw_newline) { printf("\n%*s", start, ""); column = start + corr; } n = printf("%s%.*s", column > start ? " " : "", wlen, s); if (n <= 0) break; + saw_newline = s[wlen] == '\n'; s += wlen; column += n; s = skip_spaces(s); @@ -146,7 +148,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi wordwrap(desc, 8, pager_get_columns(), 0); printf("]\n"); } - + long_desc = long_desc ?: desc; if (long_desc && print_state->long_desc) { printf("%*s", 8, "["); wordwrap(long_desc, 8, pager_get_columns(), 0); @@ -154,7 +156,8 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi } if (print_state->detailed && encoding_desc) { - printf("%*s%s", 8, "", encoding_desc); + printf("%*s", 8, ""); + wordwrap(encoding_desc, 8, pager_get_columns(), 0); if (metric_name) printf(" MetricName: %s", metric_name); if (metric_expr) -- cgit v1.2.3 From 1284ded7d05952f2657f5abeeda5a3f74ca9cffc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:06 -0800 Subject: perf list: JSON escape encoding improvements Use strbuf to make the string under construction's length unlimited. Use the format %s to mean a literal string copy and %S to signify a need to escape the string. Add supported for escaping a newline character. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 109 ++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index f3750331e8f6..137d73edb541 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -17,6 +17,7 @@ #include "util/metricgroup.h" #include "util/string2.h" #include "util/strlist.h" +#include "util/strbuf.h" #include #include #include @@ -250,45 +251,56 @@ static void json_print_end(void *ps) printf("%s]\n", print_state->need_sep ? "\n" : ""); } -static void fix_escape_printf(const char *fmt, ...) +static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) { va_list args; - char buf[2048]; - size_t buf_pos = 0; va_start(args, fmt); + strbuf_setlen(buf, 0); for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) { switch (fmt[fmt_pos]) { - case '%': { - const char *s = va_arg(args, const char*); - + case '%': fmt_pos++; - assert(fmt[fmt_pos] == 's'); - for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { - switch (s[s_pos]) { - case '\\': - __fallthrough; - case '\"': - buf[buf_pos++] = '\\'; - assert(buf_pos < sizeof(buf)); - __fallthrough; - default: - buf[buf_pos++] = s[s_pos]; - assert(buf_pos < sizeof(buf)); - break; + switch (fmt[fmt_pos]) { + case 's': { + const char *s = va_arg(args, const char*); + + strbuf_addstr(buf, s); + break; + } + case 'S': { + const char *s = va_arg(args, const char*); + + for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) { + switch (s[s_pos]) { + case '\n': + strbuf_addstr(buf, "\\n"); + break; + case '\\': + __fallthrough; + case '\"': + strbuf_addch(buf, '\\'); + __fallthrough; + default: + strbuf_addch(buf, s[s_pos]); + break; + } } + break; + } + default: + pr_err("Unexpected format character '%c'\n", fmt[fmt_pos]); + strbuf_addch(buf, '%'); + strbuf_addch(buf, fmt[fmt_pos]); } break; - } default: - buf[buf_pos++] = fmt[fmt_pos]; - assert(buf_pos < sizeof(buf)); + strbuf_addch(buf, fmt[fmt_pos]); break; } } va_end(args); - buf[buf_pos] = '\0'; - fputs(buf, stdout); + fputs(buf->buf, stdout); } static void json_print_event(void *ps, const char *pmu_name, const char *topic, @@ -301,62 +313,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, { struct json_print_state *print_state = ps; bool need_sep = false; + struct strbuf buf; + strbuf_init(&buf, 0); printf("%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (pmu_name) { - fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name); + fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name); need_sep = true; } if (topic) { - fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic); + fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic); need_sep = true; } if (event_name) { - fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name); + fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "", + event_name); need_sep = true; } if (event_alias && strlen(event_alias)) { - fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias); + fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "", + event_alias); need_sep = true; } if (scale_unit && strlen(scale_unit)) { - fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", scale_unit); need_sep = true; } if (event_type_desc) { - fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "", event_type_desc); need_sep = true; } if (deprecated) { - fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "", deprecated ? "1" : "0"); need_sep = true; } if (desc) { - fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", long_desc); need_sep = true; } if (encoding_desc) { - fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc); + fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "", + encoding_desc); need_sep = true; } if (metric_name) { - fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name); + fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", + metric_name); need_sep = true; } if (metric_expr) { - fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr); + fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", + metric_expr); need_sep = true; } printf("%s}", need_sep ? "\n" : ""); + strbuf_release(&buf); } static void json_print_metric(void *ps __maybe_unused, const char *group, @@ -366,35 +387,39 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, { struct json_print_state *print_state = ps; bool need_sep = false; + struct strbuf buf; + strbuf_init(&buf, 0); printf("%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (group) { - fix_escape_printf("\t\"MetricGroup\": \"%s\"", group); + fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group); need_sep = true; } if (name) { - fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name); + fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name); need_sep = true; } if (expr) { - fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr); + fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr); need_sep = true; } if (unit) { - fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit); + fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit); need_sep = true; } if (desc) { - fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc); + fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "", + fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", long_desc); need_sep = true; } printf("%s}", need_sep ? "\n" : ""); + strbuf_release(&buf); } int cmd_list(int argc, const char **argv) -- cgit v1.2.3 From be3392b65f2b989badcadf5f6a353c7924d3ccf4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 17 Nov 2022 18:46:07 -0800 Subject: perf list: List callback support for libpfm Missed previously, add libpfm support for 'perf list' callbacks and thereby JSON support. Committer notes: Add __maybe_unused to the args of the new print_libpfm_events() in the else HAVE_LIBPFM block. Fixes: e42b0ee61282a2f9 ("perf list: Add JSON output option") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Weilin Wang Cc: Xin Gao Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221118024607.409083-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pfm.c | 154 ++++++++++++++++++++++---------------------------- tools/perf/util/pfm.h | 7 ++- 2 files changed, 71 insertions(+), 90 deletions(-) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index f0bcfcab1a93..ac3227ba769c 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -12,6 +12,7 @@ #include "util/parse-events.h" #include "util/pmu.h" #include "util/pfm.h" +#include "util/strbuf.h" #include #include @@ -130,53 +131,36 @@ static const char *srcs[PFM_ATTR_CTRL_MAX] = { }; static void -print_attr_flags(pfm_event_attr_info_t *info) +print_attr_flags(struct strbuf *buf, const pfm_event_attr_info_t *info) { - int n = 0; + if (info->is_dfl) + strbuf_addf(buf, "[default] "); - if (info->is_dfl) { - printf("[default] "); - n++; - } - - if (info->is_precise) { - printf("[precise] "); - n++; - } - - if (!n) - printf("- "); + if (info->is_precise) + strbuf_addf(buf, "[precise] "); } static void -print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc) +print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, + const pfm_pmu_info_t *pinfo, const pfm_event_info_t *info, + struct strbuf *buf) { - pfm_event_attr_info_t ainfo; - const char *src; int j, ret; + char topic[80], name[80]; - ainfo.size = sizeof(ainfo); + strbuf_setlen(buf, 0); + snprintf(topic, sizeof(topic), "pfm %s", pinfo->name); - printf(" %s\n", info->name); - printf(" [%s]\n", info->desc); - if (long_desc) { - if (info->equiv) - printf(" Equiv: %s\n", info->equiv); + snprintf(name, sizeof(name), "%s::%s", pinfo->name, info->name); + strbuf_addf(buf, "Code: 0x%"PRIx64"\n", info->code); - printf(" Code : 0x%"PRIx64"\n", info->code); - } pfm_for_each_event_attr(j, info) { - ret = pfm_get_event_attr_info(info->idx, j, - PFM_OS_PERF_EVENT_EXT, &ainfo); - if (ret != PFM_SUCCESS) - continue; - - if (ainfo.type == PFM_ATTR_UMASK) { - printf(" %s:%s\n", info->name, ainfo.name); - printf(" [%s]\n", ainfo.desc); - } + pfm_event_attr_info_t ainfo; + const char *src; - if (!long_desc) + ainfo.size = sizeof(ainfo); + ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo); + if (ret != PFM_SUCCESS) continue; if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) @@ -184,64 +168,74 @@ print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc) src = srcs[ainfo.ctrl]; switch (ainfo.type) { - case PFM_ATTR_UMASK: - printf(" Umask : 0x%02"PRIx64" : %s: ", - ainfo.code, src); - print_attr_flags(&ainfo); - putchar('\n'); + case PFM_ATTR_UMASK: /* Ignore for now */ break; case PFM_ATTR_MOD_BOOL: - printf(" Modif : %s: [%s] : %s (boolean)\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Modif: %s: [%s] : %s (boolean)\n", src, + ainfo.name, ainfo.desc); break; case PFM_ATTR_MOD_INTEGER: - printf(" Modif : %s: [%s] : %s (integer)\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Modif: %s: [%s] : %s (integer)\n", src, + ainfo.name, ainfo.desc); break; case PFM_ATTR_NONE: case PFM_ATTR_RAW_UMASK: case PFM_ATTR_MAX: default: - printf(" Attr : %s: [%s] : %s\n", src, - ainfo.name, ainfo.desc); + strbuf_addf(buf, " Attr: %s: [%s] : %s\n", src, + ainfo.name, ainfo.desc); } } -} + print_cb->print_event(print_state, + pinfo->name, + topic, + name, info->equiv, + /*scale_unit=*/NULL, + /*deprecated=*/NULL, "PFM event", + info->desc, /*long_desc=*/NULL, + /*encoding_desc=*/buf->buf, + /*metric_name=*/NULL, /*metric_expr=*/NULL); -/* - * list all pmu::event:umask, pmu::event - * printed events may not be all valid combinations of umask for an event - */ -static void -print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info) -{ - pfm_event_attr_info_t ainfo; - int j, ret; - bool has_umask = false; + pfm_for_each_event_attr(j, info) { + pfm_event_attr_info_t ainfo; + const char *src; - ainfo.size = sizeof(ainfo); + strbuf_setlen(buf, 0); - pfm_for_each_event_attr(j, info) { - ret = pfm_get_event_attr_info(info->idx, j, - PFM_OS_PERF_EVENT_EXT, &ainfo); + ainfo.size = sizeof(ainfo); + ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo); if (ret != PFM_SUCCESS) continue; - if (ainfo.type != PFM_ATTR_UMASK) - continue; + if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) + ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; - printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name); - has_umask = true; + src = srcs[ainfo.ctrl]; + if (ainfo.type == PFM_ATTR_UMASK) { + strbuf_addf(buf, "Umask: 0x%02"PRIx64" : %s: ", + ainfo.code, src); + print_attr_flags(buf, &ainfo); + snprintf(name, sizeof(name), "%s::%s:%s", + pinfo->name, info->name, ainfo.name); + print_cb->print_event(print_state, + pinfo->name, + topic, + name, /*alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/NULL, "PFM event", + ainfo.desc, /*long_desc=*/NULL, + /*encoding_desc=*/buf->buf, + /*metric_name=*/NULL, /*metric_expr=*/NULL); + } } - if (!has_umask) - printf("%s::%s\n", pinfo->name, info->name); } -void print_libpfm_events(bool name_only, bool long_desc) +void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state) { pfm_event_info_t info; pfm_pmu_info_t pinfo; - int i, p, ret; + int p, ret; + struct strbuf storage; libpfm_initialize(); @@ -249,12 +243,9 @@ void print_libpfm_events(bool name_only, bool long_desc) info.size = sizeof(info); pinfo.size = sizeof(pinfo); - if (!name_only) - puts("\nList of pre-defined events (to be used in --pfm-events):\n"); + strbuf_init(&storage, 2048); pfm_for_all_pmus(p) { - bool printed_pmu = false; - ret = pfm_get_pmu_info(p, &pinfo); if (ret != PFM_SUCCESS) continue; @@ -267,25 +258,14 @@ void print_libpfm_events(bool name_only, bool long_desc) if (pinfo.pmu == PFM_PMU_PERF_EVENT) continue; - for (i = pinfo.first_event; i != -1; - i = pfm_get_event_next(i)) { - + for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) { ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, &info); if (ret != PFM_SUCCESS) continue; - if (!name_only && !printed_pmu) { - printf("%s:\n", pinfo.name); - printed_pmu = true; - } - - if (!name_only) - print_libpfm_events_detailed(&info, long_desc); - else - print_libpfm_events_raw(&pinfo, &info); + print_libpfm_event(print_cb, print_state, &pinfo, &info, &storage); } - if (!name_only && printed_pmu) - putchar('\n'); } + strbuf_release(&storage); } diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h index 7d70dda87012..fb25c2749d26 100644 --- a/tools/perf/util/pfm.h +++ b/tools/perf/util/pfm.h @@ -7,13 +7,14 @@ #ifndef __PERF_PFM_H #define __PERF_PFM_H +#include "print-events.h" #include #ifdef HAVE_LIBPFM int parse_libpfm_events_option(const struct option *opt, const char *str, int unset); -void print_libpfm_events(bool name_only, bool long_desc); +void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state); #else #include @@ -26,8 +27,8 @@ static inline int parse_libpfm_events_option( return 0; } -static inline void print_libpfm_events(bool name_only __maybe_unused, - bool long_desc __maybe_unused) +static inline void print_libpfm_events(const struct print_callbacks *print_cb __maybe_unused, + void *print_state __maybe_unused) { } -- cgit v1.2.3 From 49bd97c28b7e7f014a72821fd95fcf11e11599a4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 19 Nov 2022 01:34:46 +0000 Subject: perf tools: Use dedicated non-atomic clear/set bit helpers Use the dedicated non-atomic helpers for {clear,set}_bit() and their test variants, i.e. the double-underscore versions. Depsite being defined in atomic.h, and despite the kernel versions being atomic in the kernel, tools' {clear,set}_bit() helpers aren't actually atomic. Move to the double-underscore versions so that the versions that are expected to be atomic (for kernel developers) can be made atomic without affecting users that don't want atomic operations. No functional change intended. Signed-off-by: Sean Christopherson Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: James Morse Cc: Jiri Olsa Cc: Marc Zyngier Cc: Mark Rutland Cc: Oliver Upton Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Yury Norov Cc: alexandru elisei Cc: kvm@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20221119013450.2643007-6-seanjc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/find-bit-bench.c | 2 +- tools/perf/builtin-c2c.c | 6 +++--- tools/perf/builtin-kwork.c | 6 +++--- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-sched.c | 2 +- tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/mem2node.c | 2 +- tools/perf/util/affinity.c | 4 ++-- tools/perf/util/header.c | 8 ++++---- tools/perf/util/mmap.c | 6 +++--- tools/perf/util/pmu.c | 2 +- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/svghelper.c | 2 +- 15 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index 22b5cfe97023..d103c3136983 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits) bitmap_zero(to_test, num_bits); skip = num_bits / set_bits; for (i = 0; i < num_bits; i += skip) - set_bit(i, to_test); + __set_bit(i, to_test); for (i = 0; i < outer_iterations; i++) { old = accumulator; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a9190458d2d5..52d94c7dd836 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, "WARNING: no sample cpu value")) return; - set_bit(sample->cpu, c2c_he->cpuset); + __set_bit(sample->cpu, c2c_he->cpuset); } static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, @@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, if (WARN_ONCE(node < 0, "WARNING: failed to find node\n")) return; - set_bit(node, c2c_he->nodeset); + __set_bit(node, c2c_he->nodeset); if (c2c_he->paddr != sample->phys_addr) { c2c_he->paddr_cnt++; @@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { - set_bit(cpu.cpu, set); + __set_bit(cpu.cpu, set); if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index 4ffbf5908070..0e02b8098644 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -222,7 +222,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork, list_add_tail(&page->list, &kwork->atom_page_list); found_atom: - set_bit(i, page->bitmap); + __set_bit(i, page->bitmap); atom->time = sample->time; atom->prev = NULL; atom->page_addr = page; @@ -235,8 +235,8 @@ static void atom_free(struct kwork_atom *atom) if (atom->prev != NULL) atom_free(atom->prev); - clear_bit(atom->bit_inpage, - ((struct kwork_atom_page *)atom->page_addr)->bitmap); + __clear_bit(atom->bit_inpage, + ((struct kwork_atom_page *)atom->page_addr)->bitmap); } static void atom_del(struct kwork_atom *atom) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bd462a3f2bbd..b7fd7ec586fb 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } return 0; @@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map pr_debug("nr_threads: %d\n", rec->nr_threads); for (t = 0; t < rec->nr_threads; t++) { - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); - set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); + __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); if (verbose) { pr_debug("thread_masks[%d]: ", t); mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f93737eef07b..86e18575c9be 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { + if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 4965dd666956..0173f5402a35 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, bm); + __set_bit(perf_cpu_map__cpu(map, i).cpu, bm); } if (map) diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 4c96829510c9..a0e88c496107 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) int i; perf_cpu_map__for_each_cpu(cpu, i, map) - set_bit(cpu.cpu, bm); + __set_bit(cpu.cpu, bm); } if (map) diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 4ee96b3c755b..38dc4524b7e8 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu) return; a->changed = true; - set_bit(cpu, a->sched_cpus); + __set_bit(cpu, a->sched_cpus); /* * We ignore errors because affinity is just an optimization. * This could happen for example with isolated CPUs or cpusets. * In this case the IPIs inside the kernel's perf API still work. */ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus); - clear_bit(cpu, a->sched_cpus); + __clear_bit(cpu, a->sched_cpus); } static void __affinity__cleanup(struct affinity *a) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 98dfaf84bd13..dc2ae397d400 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -79,12 +79,12 @@ struct perf_file_attr { void perf_header__set_feat(struct perf_header *header, int feat) { - set_bit(feat, header->adds_features); + __set_bit(feat, header->adds_features); } void perf_header__clear_feat(struct perf_header *header, int feat) { - clear_bit(feat, header->adds_features); + __clear_bit(feat, header->adds_features); } bool perf_header__has_feat(const struct perf_header *header, int feat) @@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) rewinddir(dir); for_each_memory(phys, dir) { - set_bit(phys, n->set); + __set_bit(phys, n->set); } closedir(dir); @@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header, if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { bitmap_zero(header->adds_features, HEADER_FEAT_BITS); - set_bit(HEADER_BUILD_ID, header->adds_features); + __set_bit(HEADER_BUILD_ID, header->adds_features); } } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a4dff881be39..49093b21ee2d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } - set_bit(node_index, node_mask); + __set_bit(node_index, node_mask); if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", data, data + mmap_len, node_index); @@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) for (idx = 0; idx < nr_cpus; idx++) { cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu.cpu, mask->bits); + __set_bit(cpu.cpu, mask->bits); } } @@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu.cpu, map->affinity_mask.bits); + __set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e9a4f31926bf..8ff6462f051e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1533,7 +1533,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); for (b = from; b <= to; b++) - set_bit(b, bits); + __set_bit(b, bits); } void perf_pmu__del_formats(struct list_head *formats) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a5d945415bbc..5b602b6d4685 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, sprintf(handler, "%s::%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler, event->print_fmt.args); s = nsecs / NSEC_PER_SEC; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1cf65db8f861..d685a7399ee2 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -934,7 +934,7 @@ static void python_process_tracepoint(struct perf_sample *sample, sprintf(handler_name, "%s__%s", event->system, event->name); - if (!test_and_set_bit(event->id, events_defined)) + if (!__test_and_set_bit(event->id, events_defined)) define_event_symbols(event, handler_name, event->print_fmt.args); handler = get_handler(handler_name); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0e1a3d6bacb9..1facd4616317 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2749,7 +2749,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, goto out_delete_map; } - set_bit(cpu.cpu, cpu_bitmap); + __set_bit(cpu.cpu, cpu_bitmap); } err = 0; diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1e0c731fc539..5c62d3118c41 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) break; } - set_bit(c.cpu, cpumask_bits(b)); + __set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); -- cgit v1.2.3 From 20ed9fa4965875fdde5bfd65d838465e38d46b22 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 30 Nov 2022 16:51:58 +0000 Subject: perf branch: Fix interpretation of branch records Commit 93315e46b000fc80 ("perf/core: Add speculation info to branch entries") added a new field in between type and new_type. Perf has its own copy of this struct so update it to match the kernel side. This doesn't currently cause any issues because new_type is only used by the Arm BRBE driver which isn't merged yet. Committer notes: Is this really an ABI? How are we supposed to deal with old perf.data files with new tools and vice versa? :-\ Fixes: 93315e46b000fc80 ("perf/core: Add speculation info to branch entries") Reviewed-by: Anshuman Khandual Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sandipan Das Link: https://lore.kernel.org/r/20221130165158.517385-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index d6017c9b1872..3ed792db1125 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -22,9 +22,10 @@ struct branch_flags { u64 abort:1; u64 cycles:16; u64 type:4; + u64 spec:2; u64 new_type:4; u64 priv:3; - u64 reserved:33; + u64 reserved:31; }; }; }; -- cgit v1.2.3 From 7e8e5e879729b238cd17dcc292d86c1e6cc83ecc Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Mon, 28 Dec 2020 03:39:41 +0100 Subject: perf arm64: Fix mksyscalltbl, don't lose syscalls due to sort -nu When using "sort -nu", arm64 syscalls were lost. That is, the io_setup syscall (number 0) and all but one (typically ftruncate; 64) of the syscalls that are defined symbolically (like "#define __NR_ftruncate __NR3264_ftruncate") at the point where "sort" is applied. This creation-of-syscalls.c-scheme is, judging from comments, copy-pasted from powerpc, and worked there because at the time, its tools/arch/powerpc/include/uapi/asm/unistd.h had *literals*, like "#define __NR_ftruncate 93". With sort being numeric and the non-numeric key effectively evaluating to 0, the sort option "-u" means these "duplicates" are removed. There's no need to remove syscall lines with duplicate numbers for arm64 because there are none, so let's fix that by just losing the "-u". Having the table numerically sorted on syscall-number for the rest of the syscalls looks nice, so keep the "-n". Reviewed-by: Leo Yan Signed-off-by: Hans-Peter Nilsson Tested-by: Leo Yan Acked-by: Arnd Bergmann Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20201228023941.E0DE2203B5@pchp3.se.axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index 459469b7222c..a7ca48d1e37b 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -58,5 +58,5 @@ create_table() $gcc -E -dM -x c -I $incpath/include/uapi $input \ |sed -ne 's/^#define __NR_//p' \ - |sort -t' ' -k2 -nu \ + |sort -t' ' -k2 -n \ |create_table -- cgit v1.2.3 From eb0b3f501e4dd5e6668bddbbb3c20ecd1545b60d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Nov 2022 11:36:13 -0800 Subject: Revert "perf stat: Rename "aggregate-number" to "cpu-count" in JSON" This reverts commit c4b41b83c25073c09bfcc4e5ec496c9dd316656b. As Ian said, the "cpu-count" is not appropriate for uncore events, also it caused a perf test failure. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221130193613.1046804-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 847acdb5dc40..f1ee4b052198 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", id.socket, id.die, id.core, nr); break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", id.socket, id.die, nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", id.socket, nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ", + fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", id.node, nr); break; case AGGR_NONE: -- cgit v1.2.3 From 3c97d25ceb75fd3e660ba9fcd4c630d0b057a5a2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 09:34:04 -0300 Subject: perf stat: Check existence of os->prefix, fixing a segfault We need to check if we have a OS prefix, otherwise we stumble on a metric segv that I'm now seeing in Arnaldo's tree: $ gdb --args perf stat -M Backend true ... Performance counter stats for 'true': 4,712,355 TOPDOWN.SLOTS # 17.3 % tma_core_bound Program received signal SIGSEGV, Segmentation fault. __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77 77 ../sysdeps/x86_64/multiarch/strlen-evex.S: No such file or directory. (gdb) bt #0 __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77 #1 0x00007ffff74749a5 in __GI__IO_fputs (str=0x0, fp=0x7ffff75f5680 <_IO_2_1_stderr_>) #2 0x0000555555779f28 in do_new_line_std (config=0x555555e077c0 , os=0x7fffffffbf10) at util/stat-display.c:356 #3 0x000055555577a081 in print_metric_std (config=0x555555e077c0 , ctx=0x7fffffffbf10, color=0x0, fmt=0x5555558b77b5 "%8.1f", unit=0x7fffffffbb10 "% tma_memory_bound", val=13.165355724442199) at util/stat-display.c:380 #4 0x00005555557768b6 in generic_metric (config=0x555555e077c0 , metric_expr=0x55555593d5b7 "((CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES))"..., metric_events=0x555555f334e0, metric_refs=0x555555ec81d0, name=0x555555f32e80 "TOPDOWN.SLOTS", metric_name=0x555555f26c80 "tma_memory_bound", metric_unit=0x55555593d5b1 "100%", runtime=0, map_idx=0, out=0x7fffffffbd90, st=0x555555e9e620 ) at util/stat-shadow.c:934 #5 0x0000555555778cac in perf_stat__print_shadow_stats (config=0x555555e077c0 , evsel=0x555555f289d0, avg=4712355, map_idx=0, out=0x7fffffffbd90, metric_events=0x555555e078e8 , st=0x555555e9e620 ) at util/stat-shadow.c:1329 #6 0x000055555577b6a0 in printout (config=0x555555e077c0 , os=0x7fffffffbf10, uval=4712355, run=325322, ena=325322, noise=4712355, map_idx=0) at util/stat-display.c:741 #7 0x000055555577bc74 in print_counter_aggrdata (config=0x555555e077c0 , counter=0x555555f289d0, s=0, os=0x7fffffffbf10) at util/stat-display.c:838 #8 0x000055555577c1d8 in print_counter (config=0x555555e077c0 , counter=0x555555f289d0, os=0x7fffffffbf10) at util/stat-display.c:957 #9 0x000055555577dba0 in evlist__print_counters (evlist=0x555555ec3610, config=0x555555e077c0 , _target=0x555555e01c80 , ts=0x0, argc=1, argv=0x7fffffffe450) at util/stat-display.c:1413 #10 0x00005555555fc821 in print_counters (ts=0x0, argc=1, argv=0x7fffffffe450) at builtin-stat.c:1040 #11 0x000055555560091a in cmd_stat (argc=1, argv=0x7fffffffe450) at builtin-stat.c:2665 #12 0x00005555556b1eea in run_builtin (p=0x555555e11f70 , argc=4, argv=0x7fffffffe450) at perf.c:322 #13 0x00005555556b2181 in handle_internal_command (argc=4, argv=0x7fffffffe450) at perf.c:376 #14 0x00005555556b22d7 in run_argv (argcp=0x7fffffffe27c, argv=0x7fffffffe270) at perf.c:420 #15 0x00005555556b26ef in main (argc=4, argv=0x7fffffffe450) at perf.c:550 (gdb) Fixes: f123b2d84ecec9a3 ("perf stat: Remove prefix argument in print_metric_headers()") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: http://lore.kernel.org/lkml/CAP-5=fUOjSM5HajU9TCD6prY39LbX4OQbkEbtKPPGRBPBN=_VQ@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index f1ee4b052198..9b7772e6abf6 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -353,7 +353,8 @@ static void do_new_line_std(struct perf_stat_config *config, struct outstate *os) { fputc('\n', os->fh); - fputs(os->prefix, os->fh); + if (os->prefix) + fputs(os->prefix, os->fh); aggr_printout(config, os->evsel, os->id, os->nr); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); -- cgit v1.2.3 From 955f6def5590ce6ca11a1c1ced0d2d1c95421059 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 5 Dec 2022 12:14:43 +0530 Subject: perf record: Add remaining branch filters: "no_cycles", "no_flags" & "hw_index" This adds all remaining branch filters i.e "no_cycles", "no_flags" and "hw_index". While here, also updates the documentation. Signed-off-by: Anshuman Khandual Cc: James Clark Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20221205064443.533587-1-anshuman.khandual@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 +++++ tools/perf/util/parse-branch-options.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9ea6d44aca58..5f9fa07b3dde 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -388,6 +388,7 @@ following filters are defined: - any_call: any function call or system call - any_ret: any function return or system call return - ind_call: any indirect branch + - ind_jmp: any indirect jump - call: direct calls, including far (to/from kernel) calls - u: only when the branch target is at the user level - k: only when the branch target is in the kernel @@ -396,6 +397,10 @@ following filters are defined: - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches + - call_stack: save call stack + - no_flags: don't save branch flags e.g prediction, misprediction etc + - no_cycles: don't save branch cycles + - hw_index: save branch hardware index - save_type: save branch type during sampling in case binary is not available later For the platforms with Intel Arch LBR support (12th-Gen+ client or 4th-Gen Xeon+ server), the save branch type is unconditionally enabled diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 31faf2bb49ff..fd67d204d720 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,8 +30,11 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS), + BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), BRANCH_END }; -- cgit v1.2.3 From 8f4b1e3cebce5d12048409393de751e4d663ce42 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 5 Dec 2022 09:58:52 +0530 Subject: perf stat: Fix printing field separator in CSV metrics output In 'perf stat' with CSV output option, number of fields in metrics output is not matching with number of fields in other event output lines. Sample output below after applying patch to fix printing os->prefix. # ./perf stat -x, --per-socket -a -C 1 ls S0,1,82.11,msec,cpu-clock,82111626,100.00,1.000,CPUs utilized S0,1,2,,context-switches,82109314,100.00,24.358,/sec ------ ====> S0,1,,,,,,,1.71,stalled cycles per insn The above command line uses field separator as "," via "-x," option and per-socket option displays socket value as first field. But here the last line for "stalled cycles per insn" has more separators. Each csv output line is expected to have 8 field separators (for the 9 fields), where as last line has 9 "," in the result. Patch fixes this issue. The counter stats are displayed by function "perf_stat__print_shadow_stats" in code "util/stat-shadow.c". While printing the stats info for "stalled cycles per insn", function "new_line_csv" is used as new_line callback. The fields printed in each line contains: "Socket_id,aggr nr,Avg,unit,event_name,run,enable_percent,ratio,unit" The metric output prints Socket_id, aggr nr, ratio and unit. It has to skip through remaining five fields ie, Avg,unit,event_name,run,enable_percent. The csv line callback uses "os->nfields" to know the number of fields to skip to match with other lines. Currently it is set as: os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); But in case of aggregation modes, csv_sep already gets printed along with each field (Function "aggr_printout" in util/stat-display.c). So aggr_fields can be removed from nfields. And fixed number of fields to skip has to be "4". This is to skip fields for: "avg, unit, event name, run, enable_percent" This needs 4 csv separators. Patch removes aggr_fields and uses 4 as fixed number of os->nfields to skip. After the patch: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,79.08,msec,cpu-clock,79085956,100.00,1.000,CPUs utilized S0,1,7,,context-switches,79084176,100.00,88.514,/sec ------ ====> S0,1,,,,,,0.81,stalled cycles per insn Fixes: 92a61f6412d3a09d ("perf stat: Implement CSV metrics output") Reported-by: Disha Goel Reviewed-by: Kajol Jain Signed-off-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Disha Goel Cc: Andi Kleen Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20221205042852.83382-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9b7772e6abf6..660e4f6616f5 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -687,20 +687,9 @@ static void printout(struct perf_stat_config *config, struct outstate *os, struct evsel *counter = os->evsel; if (config->csv_output) { - static const int aggr_fields[AGGR_MAX] = { - [AGGR_NONE] = 1, - [AGGR_GLOBAL] = 0, - [AGGR_SOCKET] = 2, - [AGGR_DIE] = 2, - [AGGR_CORE] = 2, - [AGGR_THREAD] = 1, - [AGGR_UNSET] = 0, - [AGGR_NODE] = 1, - }; - pm = config->metric_only ? print_metric_only_csv : print_metric_csv; nl = config->metric_only ? new_line_metric : new_line_csv; - os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0); + os->nfields = 4 + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; nl = config->metric_only ? new_line_metric : new_line_json; -- cgit v1.2.3 From 1849f9f00926c54fa284be3b7f801de8b010572b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:39 -0800 Subject: tools lib api: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 3649c7f7ea65..044860ac1ed1 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -88,10 +88,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -99,14 +99,28 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libapi_headers) \ - $(call do_install,cpu.h,$(prefix)/include/api,644); \ - $(call do_install,debug.h,$(prefix)/include/api,644); \ - $(call do_install,io.h,$(prefix)/include/api,644); \ - $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ - $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \ - $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); +HDRS := cpu.h debug.h io.h +FD_HDRS := fd/array.h +FS_HDRS := fs/fs.h fs/tracing_path.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_FD_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FD_HDRS)) +INSTALL_FS_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FS_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_FD_HDRS): $(INSTALL_HDRS_PFX)/fd/%.h: fd/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fd/,644) + +$(INSTALL_FS_HDRS): $(INSTALL_HDRS_PFX)/fs/%.h: fs/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fs/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_FD_HDRS) $(INSTALL_FS_HDRS) + $(call QUIET_INSTALL, libapi_headers) install: install_lib install_headers -- cgit v1.2.3 From 47e02b94a4c98dcc8072e56efaae5057174050fa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:40 -0800 Subject: tools lib perf: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Makefile | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index a90fb8c6bed4..30b7f91e7147 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -176,10 +176,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: libs @@ -187,23 +187,24 @@ install_lib: libs $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libperf_headers) \ - $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \ - $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644); +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h +INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h + +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_INTERNAL_HDRS_PFX := $(DESTDIR)$(prefix)/include/internal +INSTALL_INTERNAL_HDRS := $(addprefix $(INSTALL_INTERNAL_HDRS_PFX)/, $(INTERNAL_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: include/perf/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_INTERNAL_HDRS): $(INSTALL_INTERNAL_HDRS_PFX)/%.h: include/internal/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_INTERNAL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS) + $(call QUIET_INSTALL, libperf_headers) install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ -- cgit v1.2.3 From 5d890591db6bed8ca69bd4bfe0cdaca372973033 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:41 -0800 Subject: tools lib subcmd: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/Makefile | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 9a316d8b89df..b87213263a5e 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -89,10 +89,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -100,13 +100,16 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libsubcmd_headers) \ - $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ - $(call do_install,help.h,$(prefix)/include/subcmd,644); \ - $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ - $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \ - $(call do_install,run-command.h,$(prefix)/include/subcmd,644); +HDRS := exec-cmd.h help.h pager.h parse-options.h run-command.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/subcmd +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsubcmd_headers) install: install_lib install_headers -- cgit v1.2.3 From 113bb3964297467baeb1fd2c4f86d0a4142e4259 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 Dec 2022 20:57:42 -0800 Subject: tools lib symbol: Add dependency test to install_headers Compute the headers to be installed from their source headers and make each have its own build target to install it. Using dependencies avoids headers being reinstalled and getting a new timestamp which then causes files that depend on the header to be rebuilt. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Nicolas Schier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20221202045743.2639466-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/symbol/Makefile | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile index ea8707b3442a..13d43c6f92b4 100644 --- a/tools/lib/symbol/Makefile +++ b/tools/lib/symbol/Makefile @@ -89,10 +89,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: $(LIBFILE) @@ -100,9 +100,16 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, libsymbol_headers) \ - $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); +HDRS := kallsyms.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/symbol +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsymbol_headers) install: install_lib install_headers -- cgit v1.2.3 From 117195d9f8af74c65bf57d9b56f496b5b3655bcb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 2 Dec 2022 11:04:47 -0800 Subject: perf stat: Fix multi-line metric output in JSON When a metric produces more than one values, it missed to print the opening bracket. Fixes: ab6baaae27357290 ("perf stat: Fix JSON output in metric-only mode") Reported-by: Weilin Wang Signed-off-by: Namhyung Kim Tested-by: Weilin Wang Cc: Adrian Hunter Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221202190447.1588680-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 660e4f6616f5..7f885b04c23f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -441,7 +441,7 @@ static void new_line_json(struct perf_stat_config *config, void *ctx) { struct outstate *os = ctx; - fputc('\n', os->fh); + fputs("\n{", os->fh); if (os->prefix) fprintf(os->fh, "%s", os->prefix); aggr_printout(config, os->evsel, os->id, os->nr); -- cgit v1.2.3 From 3f81f72d30b46efb614d93f430684c0deb8439b7 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 30 Nov 2022 11:15:21 +0000 Subject: perf stat: Fix invalid output handle In this context, 'os' is already a pointer so the extra dereference isn't required. This fixes the following test failure on aarch64: $ ./perf test "json output" -vvv 92: perf stat JSON output linter : --- start --- Checking json output: no args Test failed for input: ... Fatal error: glibc detected an invalid stdio handle ---- end ---- perf stat JSON output linter: FAILED! Fixes: e7f4da312259e618 ("perf stat: Pass struct outstate to printout()") Signed-off-by: James Clark Tested-by: Athira Jajeev Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221130111521.334152-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7f885b04c23f..ead4915c4a03 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -731,7 +731,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, perf_stat__print_shadow_stats(config, counter, uval, map_idx, &out, &config->metric_events, &rt_stat); } else { - pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } if (!config->metric_only) { -- cgit v1.2.3 From fce9a619145181ca6a41253f7de3df56b1e4ad59 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:32 -0800 Subject: perf util: Make header guard consistent with tool Remove git reference by changing GIT_COMPAT_UTIL_H to __PERF_UTIL_H. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20221130062935.2219247-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c1f2d423a9ec..63cdab0e5314 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef GIT_COMPAT_UTIL_H -#define GIT_COMPAT_UTIL_H +#ifndef __PERF_UTIL_H +#define __PERF_UTIL_H #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ @@ -94,4 +94,4 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, 0; \ }) -#endif /* GIT_COMPAT_UTIL_H */ +#endif /* __PERF_UTIL_H */ -- cgit v1.2.3 From 5b7a29fb0b7d67e5d40cd6557e073afb6a7466ab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:33 -0800 Subject: perf util: Add host_is_bigendian to util.h Avoid libtraceevent dependency for tep_is_bigendian or trace-event.h dependency for bigendian. Add a new host_is_bigendian to util.h, using the compiler defined __BYTE_ORDER__ when available. Committer notes: Added: #else /* !__BYTE_ORDER__ */ On that nested #ifdef block, as per Namhyung's suggestion. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20221130062935.2219247-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 4 ++-- tools/perf/tests/sample-parsing.c | 4 ++-- tools/perf/util/evsel.c | 5 +---- tools/perf/util/trace-event-info.c | 14 +++----------- tools/perf/util/trace-event-read.c | 3 ++- tools/perf/util/trace-event.h | 2 -- tools/perf/util/util.h | 19 +++++++++++++++++++ 7 files changed, 29 insertions(+), 22 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 95feb6ef34a0..cb8cd09938d5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -16,7 +16,6 @@ #include "dso.h" #include "env.h" #include "parse-events.h" -#include "trace-event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" @@ -28,6 +27,7 @@ #include "util/mmap.h" #include "util/string2.h" #include "util/synthetic-events.h" +#include "util/util.h" #include "thread.h" #include "tests.h" @@ -79,7 +79,7 @@ static size_t read_objdump_chunk(const char **line, unsigned char **buf, * see disassemble_bytes() at binutils/objdump.c for details * how objdump chooses display endian) */ - if (bytes_read > 1 && !bigendian()) { + if (bytes_read > 1 && !host_is_bigendian()) { unsigned char *chunk_end = chunk_start + bytes_read - 1; unsigned char tmp; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 20930dd48ee0..927c7f0cc4cc 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -13,7 +13,7 @@ #include "evsel.h" #include "debug.h" #include "util/synthetic-events.h" -#include "util/trace-event.h" +#include "util/util.h" #include "tests.h" @@ -117,7 +117,7 @@ static bool samples_same(const struct perf_sample *s1, COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) { if (needs_swap) - return ((tep_is_bigendian()) ? + return ((host_is_bigendian()) ? (FLAG(s2).value == BS_EXPECTED_BE) : (FLAG(s2).value == BS_EXPECTED_LE)); else diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 45f4f08399ae..0f617359a82f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2320,11 +2320,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) * as it has variable bit-field sizes. Instead the * macro takes the bit-field position/size, * swaps it based on the host endianness. - * - * tep_is_bigendian() is used here instead of - * bigendian() to avoid python test fails. */ - if (tep_is_bigendian()) { + if (host_is_bigendian()) { new_val = bitfield_swap(value, 0, 1); new_val |= bitfield_swap(value, 1, 1); new_val |= bitfield_swap(value, 2, 1); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 892c323b4ac9..c24b3a15e319 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -26,6 +26,7 @@ #include #include "evsel.h" #include "debug.h" +#include "util.h" #define VERSION "0.6" #define MAX_EVENT_LENGTH 512 @@ -38,15 +39,6 @@ struct tracepoint_path { struct tracepoint_path *next; }; -int bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; - unsigned int *ptr; - - ptr = (unsigned int *)(void *)str; - return *ptr == 0x01020304; -} - /* unfortunately, you can not stat debugfs or proc files for size */ static int record_file(const char *file, ssize_t hdr_sz) { @@ -79,7 +71,7 @@ static int record_file(const char *file, ssize_t hdr_sz) /* ugh, handle big-endian hdr_size == 4 */ sizep = (char*)&size; - if (bigendian()) + if (host_is_bigendian()) sizep += sizeof(u64) - hdr_sz; if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) { @@ -564,7 +556,7 @@ static int tracing_data_header(void) return -1; /* save endian */ - if (bigendian()) + if (host_is_bigendian()) buf[0] = 1; else buf[0] = 0; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a01af783310..43146a4ce2fb 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -17,6 +17,7 @@ #include "trace-event.h" #include "debug.h" +#include "util.h" static int input_fd; @@ -414,7 +415,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) return -1; } file_bigendian = buf[0]; - host_bigendian = bigendian(); + host_bigendian = host_is_bigendian() ? 1 : 0; if (trace_event__init(tevent)) { pr_debug("trace_event__init failed"); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 640981105788..8f39f5bcb2c2 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -27,8 +27,6 @@ trace_event__tp_format(const char *sys, const char *name); struct tep_event *trace_event__tp_format_id(int id); -int bigendian(void); - void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 63cdab0e5314..1d3b300af5a1 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -94,4 +94,23 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x, 0; \ }) +static inline bool host_is_bigendian(void) +{ +#ifdef __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return false; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return true; +#else +#error "Unrecognized __BYTE_ORDER__" +#endif +#else /* !__BYTE_ORDER__ */ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; + unsigned int *ptr; + + ptr = (unsigned int *)(void *)str; + return *ptr == 0x01020304; +#endif +} + #endif /* __PERF_UTIL_H */ -- cgit v1.2.3 From 336b92da1aa4228a664f27972f61e6186f369e79 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 6 Dec 2022 10:02:36 +0530 Subject: perf tool: Move pmus list variable to a new file The 'pmus' list variable is defined as static variable under pmu.c file. Introduce a new pmus.c file and migrate this variable to it. Also make it non static so that it can be accessed from outside. Suggested-by: Ian Rogers Signed-off-by: Ravi Bangoria Acked-by: Ian Rogers Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Athira Jajeev Cc: Jiri Olsa Cc: Kajol Jain Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Thomas Richter Cc: carsten.haitzler@arm.com Link: https://lore.kernel.org/r/20221206043237.12159-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/pmu.c | 2 +- tools/perf/util/pmus.c | 5 +++++ tools/perf/util/pmus.h | 9 +++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/pmus.c create mode 100644 tools/perf/util/pmus.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ab37f588ee8b..d04802bfa23f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -73,6 +73,7 @@ perf-y += trace-event-parse.o perf-y += parse-events-flex.o perf-y += parse-events-bison.o perf-y += pmu.o +perf-y += pmus.o perf-y += pmu-flex.o perf-y += pmu-bison.o perf-y += pmu-hybrid.o diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8ff6462f051e..2bdeb89352e7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -22,6 +22,7 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "pmus.h" #include "parse-events.h" #include "print-events.h" #include "header.h" @@ -58,7 +59,6 @@ struct perf_pmu_format { int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; -static LIST_HEAD(pmus); static bool hybrid_scanned; /* diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c new file mode 100644 index 000000000000..7f3b93c4d229 --- /dev/null +++ b/tools/perf/util/pmus.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +LIST_HEAD(pmus); diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h new file mode 100644 index 000000000000..5ec12007eb5c --- /dev/null +++ b/tools/perf/util/pmus.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PMUS_H +#define __PMUS_H + +extern struct list_head pmus; + +#define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list) + +#endif /* __PMUS_H */ -- cgit v1.2.3 From 9d9b22bedad13d96b34fe005ef44b4523c4eb786 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 6 Dec 2022 10:02:37 +0530 Subject: perf test: Add event group test for events in multiple PMUs Multiple events in a group can belong to one or more PMUs, however there are some limitations. One of the limitations is that perf doesn't allow creating a group of events from different hw PMUs. Write a simple test to create various combinations of hw, sw and uncore PMU events and verify group creation succeeds or fails as expected. Signed-off-by: Ravi Bangoria Acked-by: Ian Rogers Acked-by: Kan Liang Acked-by: Madhavan Srinivasan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Athira Jajeev Cc: Carsten Haitzler Cc: Jiri Olsa Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Thomas Richter Link: https://lore.kernel.org/r/20221206043237.12159-3-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/event_groups.c | 127 ++++++++++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 130 insertions(+) create mode 100644 tools/perf/tests/event_groups.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 11b69023011b..658b5052c24d 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -67,6 +67,7 @@ perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o perf-y += sigtrap.o +perf-y += event_groups.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4c6ae59a4dfd..ddd8262bfa26 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -110,6 +110,7 @@ static struct test_suite *generic_tests[] = { &suite__perf_time_to_tsc, &suite__dlfilter, &suite__sigtrap, + &suite__event_groups, NULL, }; diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c new file mode 100644 index 000000000000..612c0444aaa8 --- /dev/null +++ b/tools/perf/tests/event_groups.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "linux/perf_event.h" +#include "tests.h" +#include "debug.h" +#include "pmu.h" +#include "pmus.h" +#include "header.h" +#include "../perf-sys.h" + +/* hw: cycles, sw: context-switch, uncore: [arch dependent] */ +static int types[] = {0, 1, -1}; +static unsigned long configs[] = {0, 3, 0}; + +#define NR_UNCORE_PMUS 5 + +/* Uncore pmus that support more than 3 counters */ +static struct uncore_pmus { + const char *name; + __u64 config; +} uncore_pmus[NR_UNCORE_PMUS] = { + { "amd_l3", 0x0 }, + { "amd_df", 0x0 }, + { "uncore_imc_0", 0x1 }, /* Intel */ + { "core_imc", 0x318 }, /* PowerPC: core_imc/CPM_STCX_FIN/ */ + { "hv_24x7", 0x22000000003 }, /* PowerPC: hv_24x7/CPM_STCX_FIN/ */ +}; + +static int event_open(int type, unsigned long config, int group_fd) +{ + struct perf_event_attr attr; + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.type = type; + attr.size = sizeof(struct perf_event_attr); + attr.config = config; + /* + * When creating an event group, typically the group leader is + * initialized with disabled set to 1 and any child events are + * initialized with disabled set to 0. Despite disabled being 0, + * the child events will not start until the group leader is + * enabled. + */ + attr.disabled = group_fd == -1 ? 1 : 0; + + return sys_perf_event_open(&attr, -1, 0, group_fd, 0); +} + +static int setup_uncore_event(void) +{ + struct perf_pmu *pmu; + int i; + + if (list_empty(&pmus)) + perf_pmu__scan(NULL); + + perf_pmus__for_each_pmu(pmu) { + for (i = 0; i < NR_UNCORE_PMUS; i++) { + if (!strcmp(uncore_pmus[i].name, pmu->name)) { + pr_debug("Using %s for uncore pmu event\n", pmu->name); + types[2] = pmu->type; + configs[2] = uncore_pmus[i].config; + return 0; + } + } + } + return -1; +} + +static int run_test(int i, int j, int k) +{ + int erroneous = ((((1 << i) | (1 << j) | (1 << k)) & 5) == 5); + int group_fd, sibling_fd1, sibling_fd2; + + group_fd = event_open(types[i], configs[i], -1); + if (group_fd == -1) + return -1; + + sibling_fd1 = event_open(types[j], configs[j], group_fd); + if (sibling_fd1 == -1) { + close(group_fd); + return erroneous ? 0 : -1; + } + + sibling_fd2 = event_open(types[k], configs[k], group_fd); + if (sibling_fd2 == -1) { + close(sibling_fd1); + close(group_fd); + return erroneous ? 0 : -1; + } + + close(sibling_fd2); + close(sibling_fd1); + close(group_fd); + return erroneous ? -1 : 0; +} + +static int test__event_groups(struct test_suite *text __maybe_unused, int subtest __maybe_unused) +{ + int i, j, k; + int ret; + int r; + + ret = setup_uncore_event(); + if (ret || types[2] == -1) + return TEST_SKIP; + + ret = TEST_OK; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + for (k = 0; k < 3; k++) { + r = run_test(i, j, k); + if (r) + ret = TEST_FAIL; + + pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n", + types[i], configs[i], types[j], configs[j], + types[k], configs[k], r ? "Fail" : "Pass"); + } + } + } + return ret; +} + +DEFINE_SUITE("Event groups", event_groups); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e15f24cfc909..fb4b5ad4dd0f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -147,6 +147,7 @@ DECLARE_SUITE(expand_cgroup_events); DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); DECLARE_SUITE(sigtrap); +DECLARE_SUITE(event_groups); /* * PowerPC and S390 do not support creation of instruction breakpoints using the -- cgit v1.2.3 From cc2367eebb0c3c5501cddd5823e5feda7b57f706 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Dec 2022 13:49:04 -0300 Subject: machine: Adopt is_lock_function() from builtin-lock.c It is used in bpf_lock_contention.c and builtin-lock.c will be made CONFIG_LIBTRACEEVENT=y conditional, so move it to machine.c, that is always available. This makes those 4 global variables for sched and lock text start and end to move to 'struct machine' too, as conceivably we can have that info for several machine instances, say some 'perf diff' like tool. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 58 ++--------------------------------- tools/perf/util/bpf_lock_contention.c | 2 +- tools/perf/util/lock-contention.h | 2 -- tools/perf/util/machine.c | 40 ++++++++++++++++++++++++ tools/perf/util/machine.h | 5 +++ 5 files changed, 48 insertions(+), 59 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 0d280093b19a..15ce6358f127 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -67,11 +67,6 @@ static enum { LOCK_AGGR_CALLER, } aggr_mode = LOCK_AGGR_ADDR; -static u64 sched_text_start; -static u64 sched_text_end; -static u64 lock_text_start; -static u64 lock_text_end; - static struct thread_stat *thread_stat_find(u32 tid) { struct rb_node *node; @@ -854,55 +849,6 @@ end: return 0; } -bool is_lock_function(struct machine *machine, u64 addr) -{ - if (!sched_text_start) { - struct map *kmap; - struct symbol *sym; - - sym = machine__find_kernel_symbol_by_name(machine, - "__sched_text_start", - &kmap); - if (!sym) { - /* to avoid retry */ - sched_text_start = 1; - return false; - } - - sched_text_start = kmap->unmap_ip(kmap, sym->start); - - /* should not fail from here */ - sym = machine__find_kernel_symbol_by_name(machine, - "__sched_text_end", - &kmap); - sched_text_end = kmap->unmap_ip(kmap, sym->start); - - sym = machine__find_kernel_symbol_by_name(machine, - "__lock_text_start", - &kmap); - lock_text_start = kmap->unmap_ip(kmap, sym->start); - - sym = machine__find_kernel_symbol_by_name(machine, - "__lock_text_end", - &kmap); - lock_text_end = kmap->unmap_ip(kmap, sym->start); - } - - /* failed to get kernel symbols */ - if (sched_text_start == 1) - return false; - - /* mutex and rwsem functions are in sched text section */ - if (sched_text_start <= addr && addr < sched_text_end) - return true; - - /* spinlock functions are in lock text section */ - if (lock_text_start <= addr && addr < lock_text_end) - return true; - - return false; -} - static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip, char *buf, int size) { @@ -961,7 +907,7 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl goto next; sym = node->ms.sym; - if (sym && !is_lock_function(machine, node->ip)) { + if (sym && !machine__is_lock_function(machine, node->ip)) { get_symbol_name_offset(node->ms.map, sym, node->ip, buf, size); return 0; @@ -1007,7 +953,7 @@ static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) if (++skip <= stack_skip) goto next; - if (node->ms.sym && is_lock_function(machine, node->ip)) + if (node->ms.sym && machine__is_lock_function(machine, node->ip)) goto next; hash ^= hash_long((unsigned long)node->ip, 64); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 4db9ad3d50c4..f4ebb9a2e380 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -153,7 +153,7 @@ int lock_contention_read(struct lock_contention *con) bpf_map_lookup_elem(stack, &key, stack_trace); /* skip lock internal functions */ - while (is_lock_function(machine, stack_trace[idx]) && + while (machine__is_lock_function(machine, stack_trace[idx]) && idx < con->max_stack - 1) idx++; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index e3c061b1795b..a2346875098d 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -145,6 +145,4 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse #endif /* HAVE_BPF_SKEL */ -bool is_lock_function(struct machine *machine, u64 addr); - #endif /* PERF_LOCK_CONTENTION_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 76316e459c3d..803c9d1803dd 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -3336,3 +3336,43 @@ int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, voi } return err; } + +bool machine__is_lock_function(struct machine *machine, u64 addr) +{ + if (!machine->sched.text_start) { + struct map *kmap; + struct symbol *sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_start", &kmap); + + if (!sym) { + /* to avoid retry */ + machine->sched.text_start = 1; + return false; + } + + machine->sched.text_start = kmap->unmap_ip(kmap, sym->start); + + /* should not fail from here */ + sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap); + machine->sched.text_end = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_start", &kmap); + machine->lock.text_start = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap); + machine->lock.text_end = kmap->unmap_ip(kmap, sym->start); + } + + /* failed to get kernel symbols */ + if (machine->sched.text_start == 1) + return false; + + /* mutex and rwsem functions are in sched text section */ + if (machine->sched.text_start <= addr && addr < machine->sched.text_end) + return true; + + /* spinlock functions are in lock text section */ + if (machine->lock.text_start <= addr && addr < machine->lock.text_end) + return true; + + return false; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 6267c1d6f232..d034ecaf89c1 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -56,6 +56,10 @@ struct machine { struct maps *kmaps; struct map *vmlinux_map; u64 kernel_start; + struct { + u64 text_start; + u64 text_end; + } sched, lock; pid_t *current_tid; size_t current_tid_sz; union { /* Tool specific area */ @@ -212,6 +216,7 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is_lock_function(struct machine *machine, u64 addr); bool machine__is(struct machine *machine, const char *arch); bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); -- cgit v1.2.3 From 616aa32d6f221faa0235d2586ec1706dca70a439 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 14:59:38 -0800 Subject: perf build: Fixes for LIBTRACEEVENT_DYNAMIC If LIBTRACEEVENT_DYNAMIC is enabled then avoid the install step for the plugins. If disabled correct DESTDIR so that the plugins are installed under /traceevent/plugins. Fixes: ef019df01e207971 ("perf build: Install libtraceevent locally when building") Reported-by: Alexander Gordeev Signed-off-by: Ian Rogers Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221205225940.3079667-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a17a6ea85e81..6689f644782f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -884,7 +884,7 @@ $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \ $(LIBTRACEEVENT_FLAGS) install endif @@ -1093,7 +1093,11 @@ install-tests: all install-gtk $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' $(Q)$(MAKE) -C tests/shell/coresight install-tests -install-bin: install-tools install-tests install-traceevent-plugins +install-bin: install-tools install-tests + +ifndef LIBTRACEEVENT_DYNAMIC +install-bin: install-traceevent-plugins +endif install: install-bin try-install-man -- cgit v1.2.3 From b897613510890d6e92b6a276a20f6c3d96fe90e8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Dec 2022 09:58:04 -0800 Subject: perf stat: Update event skip condition for system-wide per-thread mode and merged uncore and hybrid events In print_counter_aggrdata(), it skips some events that has no aggregate count. It's actually for system-wide per-thread mode and merged uncore and hybrid events. Let's update the condition to check them explicitly. Fixes: 91f85f98da7ab8c3 ("perf stat: Display event stats using aggr counts") Reported-by: Athira Jajeev Signed-off-by: Namhyung Kim Acked-by: Athira Jajeev Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20221206175804.391387-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ead4915c4a03..8bd8b0142630 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -804,7 +804,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->nr = aggr->nr; os->evsel = counter; - if (counter->supported && aggr->nr == 0) + /* Skip already merged uncore/hybrid events */ + if (counter->merged_stat) return; uniquify_counter(config, counter); @@ -813,6 +814,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; + /* + * Skip value 0 when enabling --per-thread globally, otherwise it will + * have too many 0 output. + */ + if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide) + return; + if (!metric_only) { if (config->json_output) fputc('{', output); @@ -889,9 +897,6 @@ static void print_aggr(struct perf_stat_config *config, print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { - if (counter->merged_stat) - continue; - print_counter_aggrdata(config, counter, s, os); } print_metric_end(config, os); @@ -918,9 +923,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config, print_metric_begin(config, evlist, os, s); evlist__for_each_entry(evlist, counter) { - if (counter->merged_stat) - continue; - if (counter->cgrp != os->cgrp) continue; @@ -940,9 +942,6 @@ static void print_counter(struct perf_stat_config *config, if (!config->aggr_map) return; - if (counter->merged_stat) - return; - for (s = 0; s < config->aggr_map->nr; s++) { print_counter_aggrdata(config, counter, s, os); } -- cgit v1.2.3 From 40769665b63d8c84b5b1c63fee404d4c20cff751 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 6 Dec 2022 21:59:08 -0800 Subject: perf jevents: Parse metrics during conversion Currently the 'MetricExpr' json value is passed from the json file to the pmu-events.c. This change introduces an expression tree that is parsed into. The parsing is done largely by using operator overloading and python's 'eval' function. Two advantages in doing this are: 1) Broken metrics fail at compile time rather than relying on `perf test` to detect. `perf test` remains relevant for checking event encoding and actual metric use. 2) The conversion to a string from the tree can minimize the metric's string size, for example, preferring 1e6 over 1000000, avoiding multiplication by 1 and removing unnecessary whitespace. On x86 this reduces the string size by 2,930bytes (0.07%). In future changes it would be possible to programmatically generate the json expressions (a single line of text and so a pain to write manually) for an architecture using the expression tree. This could avoid copy-pasting metrics for all architecture variants. v4. Doesn't simplify "0*SLOTS" to 0, as the pattern is used to fix Intel metrics with topdown events. v3. Avoids generic types on standard types like set that aren't supported until Python 3.9, fixing an issue with Python 3.6 reported-by John Garry. v3 also fixes minor pylint issues and adds a call to Simplify on the read expression tree. v2. Improvements to type information. Committer notes: Added one-line fixer from Ian, see first Link: tag below. Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Link: https://lore.kernel.org/r/CAP-5=fWa=zNK_ecpWGoGggHCQx7z-oW0eGMQf19Maywg0QK=4g@mail.gmail.com Link: https://lore.kernel.org/r/20221207055908.1385448-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/Build | 2 +- tools/perf/pmu-events/jevents.py | 12 +- tools/perf/pmu-events/metric.py | 502 +++++++++++++++++++++++++++++++++++ tools/perf/pmu-events/metric_test.py | 157 +++++++++++ 4 files changed, 669 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/metric.py create mode 100644 tools/perf/pmu-events/metric_test.py diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 04ef95174660..15b9e8fdbffa 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -21,7 +21,7 @@ $(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c $(call rule_mkdir) $(Q)$(call echo-cmd,gen)cp $< $@ else -$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) +$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py $(call rule_mkdir) $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@ endif diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 0daa3e007528..4c398e0eeb2f 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -4,6 +4,7 @@ import argparse import csv import json +import metric import os import sys from typing import (Callable, Dict, Optional, Sequence, Set, Tuple) @@ -268,9 +269,10 @@ class JsonEvent: self.metric_name = jd.get('MetricName') self.metric_group = jd.get('MetricGroup') self.metric_constraint = jd.get('MetricConstraint') - self.metric_expr = jd.get('MetricExpr') - if self.metric_expr: - self.metric_expr = self.metric_expr.replace('\\', '\\\\') + self.metric_expr = None + if 'MetricExpr' in jd: + self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify() + arch_std = jd.get('ArchStdEvent') if precise and self.desc and '(Precise Event)' not in self.desc: extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise ' @@ -322,6 +324,10 @@ class JsonEvent: s = '' for attr in _json_event_attributes: x = getattr(self, attr) + if x and attr == 'metric_expr': + # Convert parsed metric expressions into a string. Slashes + # must be doubled in the file. + x = x.ToPerfJson().replace('\\', '\\\\') s += f'{x}\\000' if x else '\\000' return s diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py new file mode 100644 index 000000000000..4797ed4fd817 --- /dev/null +++ b/tools/perf/pmu-events/metric.py @@ -0,0 +1,502 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +"""Parse or generate representations of perf metrics.""" +import ast +import decimal +import json +import re +from typing import Dict, List, Optional, Set, Union + + +class Expression: + """Abstract base class of elements in a metric expression.""" + + def ToPerfJson(self) -> str: + """Returns a perf json file encoded representation.""" + raise NotImplementedError() + + def ToPython(self) -> str: + """Returns a python expr parseable representation.""" + raise NotImplementedError() + + def Simplify(self): + """Returns a simplified version of self.""" + raise NotImplementedError() + + def Equals(self, other) -> bool: + """Returns true when two expressions are the same.""" + raise NotImplementedError() + + def __str__(self) -> str: + return self.ToPerfJson() + + def __or__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('|', self, other) + + def __ror__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('|', other, self) + + def __xor__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('^', self, other) + + def __and__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('&', self, other) + + def __lt__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('<', self, other) + + def __gt__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('>', self, other) + + def __add__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('+', self, other) + + def __radd__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('+', other, self) + + def __sub__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('-', self, other) + + def __rsub__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('-', other, self) + + def __mul__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('*', self, other) + + def __rmul__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('*', other, self) + + def __truediv__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('/', self, other) + + def __rtruediv__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('/', other, self) + + def __mod__(self, other: Union[int, float, 'Expression']) -> 'Operator': + return Operator('%', self, other) + + +def _Constify(val: Union[bool, int, float, Expression]) -> Expression: + """Used to ensure that the nodes in the expression tree are all Expression.""" + if isinstance(val, bool): + return Constant(1 if val else 0) + if isinstance(val, (int, float)): + return Constant(val) + return val + + +# Simple lookup for operator precedence, used to avoid unnecessary +# brackets. Precedence matches that of python and the simple expression parser. +_PRECEDENCE = { + '|': 0, + '^': 1, + '&': 2, + '<': 3, + '>': 3, + '+': 4, + '-': 4, + '*': 5, + '/': 5, + '%': 5, +} + + +class Operator(Expression): + """Represents a binary operator in the parse tree.""" + + def __init__(self, operator: str, lhs: Union[int, float, Expression], + rhs: Union[int, float, Expression]): + self.operator = operator + self.lhs = _Constify(lhs) + self.rhs = _Constify(rhs) + + def Bracket(self, + other: Expression, + other_str: str, + rhs: bool = False) -> str: + """If necessary brackets the given other value. + + If ``other`` is an operator then a bracket is necessary when + this/self operator has higher precedence. Consider: '(a + b) * c', + ``other_str`` will be 'a + b'. A bracket is necessary as without + the bracket 'a + b * c' will evaluate 'b * c' first. However, '(a + * b) + c' doesn't need a bracket as 'a * b' will always be + evaluated first. For 'a / (b * c)' (ie the same precedence level + operations) then we add the bracket to best match the original + input, but not for '(a / b) * c' where the bracket is unnecessary. + + Args: + other (Expression): is a lhs or rhs operator + other_str (str): ``other`` in the appropriate string form + rhs (bool): is ``other`` on the RHS + + Returns: + str: possibly bracketed other_str + """ + if isinstance(other, Operator): + if _PRECEDENCE.get(self.operator, -1) > _PRECEDENCE.get( + other.operator, -1): + return f'({other_str})' + if rhs and _PRECEDENCE.get(self.operator, -1) == _PRECEDENCE.get( + other.operator, -1): + return f'({other_str})' + return other_str + + def ToPerfJson(self): + return (f'{self.Bracket(self.lhs, self.lhs.ToPerfJson())} {self.operator} ' + f'{self.Bracket(self.rhs, self.rhs.ToPerfJson(), True)}') + + def ToPython(self): + return (f'{self.Bracket(self.lhs, self.lhs.ToPython())} {self.operator} ' + f'{self.Bracket(self.rhs, self.rhs.ToPython(), True)}') + + def Simplify(self) -> Expression: + lhs = self.lhs.Simplify() + rhs = self.rhs.Simplify() + if isinstance(lhs, Constant) and isinstance(rhs, Constant): + return Constant(ast.literal_eval(lhs + self.operator + rhs)) + + if isinstance(self.lhs, Constant): + if self.operator in ('+', '|') and lhs.value == '0': + return rhs + + # Simplify multiplication by 0 except for the slot event which + # is deliberately introduced using this pattern. + if self.operator == '*' and lhs.value == '0' and ( + not isinstance(rhs, Event) or 'slots' not in rhs.name.lower()): + return Constant(0) + + if self.operator == '*' and lhs.value == '1': + return rhs + + if isinstance(rhs, Constant): + if self.operator in ('+', '|') and rhs.value == '0': + return lhs + + if self.operator == '*' and rhs.value == '0': + return Constant(0) + + if self.operator == '*' and self.rhs.value == '1': + return lhs + + return Operator(self.operator, lhs, rhs) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Operator): + return self.operator == other.operator and self.lhs.Equals( + other.lhs) and self.rhs.Equals(other.rhs) + return False + + +class Select(Expression): + """Represents a select ternary in the parse tree.""" + + def __init__(self, true_val: Union[int, float, Expression], + cond: Union[int, float, Expression], + false_val: Union[int, float, Expression]): + self.true_val = _Constify(true_val) + self.cond = _Constify(cond) + self.false_val = _Constify(false_val) + + def ToPerfJson(self): + true_str = self.true_val.ToPerfJson() + cond_str = self.cond.ToPerfJson() + false_str = self.false_val.ToPerfJson() + return f'({true_str} if {cond_str} else {false_str})' + + def ToPython(self): + return (f'Select({self.true_val.ToPython()}, {self.cond.ToPython()}, ' + f'{self.false_val.ToPython()})') + + def Simplify(self) -> Expression: + cond = self.cond.Simplify() + true_val = self.true_val.Simplify() + false_val = self.false_val.Simplify() + if isinstance(cond, Constant): + return false_val if cond.value == '0' else true_val + + if true_val.Equals(false_val): + return true_val + + return Select(true_val, cond, false_val) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Select): + return self.cond.Equals(other.cond) and self.false_val.Equals( + other.false_val) and self.true_val.Equals(other.true_val) + return False + + +class Function(Expression): + """A function in an expression like min, max, d_ratio.""" + + def __init__(self, + fn: str, + lhs: Union[int, float, Expression], + rhs: Optional[Union[int, float, Expression]] = None): + self.fn = fn + self.lhs = _Constify(lhs) + self.rhs = _Constify(rhs) + + def ToPerfJson(self): + if self.rhs: + return f'{self.fn}({self.lhs.ToPerfJson()}, {self.rhs.ToPerfJson()})' + return f'{self.fn}({self.lhs.ToPerfJson()})' + + def ToPython(self): + if self.rhs: + return f'{self.fn}({self.lhs.ToPython()}, {self.rhs.ToPython()})' + return f'{self.fn}({self.lhs.ToPython()})' + + def Simplify(self) -> Expression: + lhs = self.lhs.Simplify() + rhs = self.rhs.Simplify() if self.rhs else None + if isinstance(lhs, Constant) and isinstance(rhs, Constant): + if self.fn == 'd_ratio': + if rhs.value == '0': + return Constant(0) + Constant(ast.literal_eval(f'{lhs} / {rhs}')) + return Constant(ast.literal_eval(f'{self.fn}({lhs}, {rhs})')) + + return Function(self.fn, lhs, rhs) + + def Equals(self, other: Expression) -> bool: + if isinstance(other, Function): + return self.fn == other.fn and self.lhs.Equals( + other.lhs) and self.rhs.Equals(other.rhs) + return False + + +def _FixEscapes(s: str) -> str: + s = re.sub(r'([^\\]),', r'\1\\,', s) + return re.sub(r'([^\\])=', r'\1\\=', s) + + +class Event(Expression): + """An event in an expression.""" + + def __init__(self, name: str, legacy_name: str = ''): + self.name = _FixEscapes(name) + self.legacy_name = _FixEscapes(legacy_name) + + def ToPerfJson(self): + result = re.sub('/', '@', self.name) + return result + + def ToPython(self): + return f'Event(r"{self.name}")' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Event) and self.name == other.name + + +class Constant(Expression): + """A constant within the expression tree.""" + + def __init__(self, value: Union[float, str]): + ctx = decimal.Context() + ctx.prec = 20 + dec = ctx.create_decimal(repr(value) if isinstance(value, float) else value) + self.value = dec.normalize().to_eng_string() + self.value = self.value.replace('+', '') + self.value = self.value.replace('E', 'e') + + def ToPerfJson(self): + return self.value + + def ToPython(self): + return f'Constant({self.value})' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Constant) and self.value == other.value + + +class Literal(Expression): + """A runtime literal within the expression tree.""" + + def __init__(self, value: str): + self.value = value + + def ToPerfJson(self): + return self.value + + def ToPython(self): + return f'Literal({self.value})' + + def Simplify(self) -> Expression: + return self + + def Equals(self, other: Expression) -> bool: + return isinstance(other, Literal) and self.value == other.value + + +def min(lhs: Union[int, float, Expression], rhs: Union[int, float, + Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('min', lhs, rhs) + + +def max(lhs: Union[int, float, Expression], rhs: Union[int, float, + Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('max', lhs, rhs) + + +def d_ratio(lhs: Union[int, float, Expression], + rhs: Union[int, float, Expression]) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('d_ratio', lhs, rhs) + + +def source_count(event: Event) -> Function: + # pylint: disable=redefined-builtin + # pylint: disable=invalid-name + return Function('source_count', event) + + +class Metric: + """An individual metric that will specifiable on the perf command line.""" + groups: Set[str] + expr: Expression + scale_unit: str + constraint: bool + + def __init__(self, + name: str, + description: str, + expr: Expression, + scale_unit: str, + constraint: bool = False): + self.name = name + self.description = description + self.expr = expr.Simplify() + # Workraound valid_only_metric hiding certain metrics based on unit. + scale_unit = scale_unit.replace('/sec', ' per sec') + if scale_unit[0].isdigit(): + self.scale_unit = scale_unit + else: + self.scale_unit = f'1{scale_unit}' + self.constraint = constraint + self.groups = set() + + def __lt__(self, other): + """Sort order.""" + return self.name < other.name + + def AddToMetricGroup(self, group): + """Callback used when being added to a MetricGroup.""" + self.groups.add(group.name) + + def Flatten(self) -> Set['Metric']: + """Return a leaf metric.""" + return set([self]) + + def ToPerfJson(self) -> Dict[str, str]: + """Return as dictionary for Json generation.""" + result = { + 'MetricName': self.name, + 'MetricGroup': ';'.join(sorted(self.groups)), + 'BriefDescription': self.description, + 'MetricExpr': self.expr.ToPerfJson(), + 'ScaleUnit': self.scale_unit + } + if self.constraint: + result['MetricConstraint'] = 'NO_NMI_WATCHDOG' + + return result + + +class _MetricJsonEncoder(json.JSONEncoder): + """Special handling for Metric objects.""" + + def default(self, o): + if isinstance(o, Metric): + return o.ToPerfJson() + return json.JSONEncoder.default(self, o) + + +class MetricGroup: + """A group of metrics. + + Metric groups may be specificd on the perf command line, but within + the json they aren't encoded. Metrics may be in multiple groups + which can facilitate arrangements similar to trees. + """ + + def __init__(self, name: str, metric_list: List[Union[Metric, + 'MetricGroup']]): + self.name = name + self.metric_list = metric_list + for metric in metric_list: + metric.AddToMetricGroup(self) + + def AddToMetricGroup(self, group): + """Callback used when a MetricGroup is added into another.""" + for metric in self.metric_list: + metric.AddToMetricGroup(group) + + def Flatten(self) -> Set[Metric]: + """Returns a set of all leaf metrics.""" + result = set() + for x in self.metric_list: + result = result.union(x.Flatten()) + + return result + + def ToPerfJson(self) -> str: + return json.dumps(sorted(self.Flatten()), indent=2, cls=_MetricJsonEncoder) + + def __str__(self) -> str: + return self.ToPerfJson() + + +class _RewriteIfExpToSelect(ast.NodeTransformer): + + def visit_IfExp(self, node): + # pylint: disable=invalid-name + self.generic_visit(node) + call = ast.Call( + func=ast.Name(id='Select', ctx=ast.Load()), + args=[node.body, node.test, node.orelse], + keywords=[]) + ast.copy_location(call, node.test) + return call + + +def ParsePerfJson(orig: str) -> Expression: + """A simple json metric expression decoder. + + Converts a json encoded metric expression by way of python's ast and + eval routine. First tokens are mapped to Event calls, then + accidentally converted keywords or literals are mapped to their + appropriate calls. Python's ast is used to match if-else that can't + be handled via operator overloading. Finally the ast is evaluated. + + Args: + orig (str): String to parse. + + Returns: + Expression: The parsed string. + """ + # pylint: disable=eval-used + py = orig.strip() + py = re.sub(r'([a-zA-Z][^-+/\* \\\(\),]*(?:\\.[^-+/\* \\\(\),]*)*)', + r'Event(r"\1")', py) + py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py) + py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py) + keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count'] + for kw in keywords: + py = re.sub(rf'Event\(r"{kw}"\)', kw, py) + + parsed = ast.parse(py, mode='eval') + _RewriteIfExpToSelect().visit(parsed) + parsed = ast.fix_missing_locations(parsed) + return _Constify(eval(compile(parsed, orig, 'eval'))) diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py new file mode 100644 index 000000000000..15315d0f716c --- /dev/null +++ b/tools/perf/pmu-events/metric_test.py @@ -0,0 +1,157 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +import unittest +from metric import Constant +from metric import Event +from metric import ParsePerfJson + + +class TestMetricExpressions(unittest.TestCase): + + def test_Operators(self): + a = Event('a') + b = Event('b') + self.assertEqual((a | b).ToPerfJson(), 'a | b') + self.assertEqual((a ^ b).ToPerfJson(), 'a ^ b') + self.assertEqual((a & b).ToPerfJson(), 'a & b') + self.assertEqual((a < b).ToPerfJson(), 'a < b') + self.assertEqual((a > b).ToPerfJson(), 'a > b') + self.assertEqual((a + b).ToPerfJson(), 'a + b') + self.assertEqual((a - b).ToPerfJson(), 'a - b') + self.assertEqual((a * b).ToPerfJson(), 'a * b') + self.assertEqual((a / b).ToPerfJson(), 'a / b') + self.assertEqual((a % b).ToPerfJson(), 'a % b') + one = Constant(1) + self.assertEqual((a + one).ToPerfJson(), 'a + 1') + + def test_Brackets(self): + a = Event('a') + b = Event('b') + c = Event('c') + self.assertEqual((a * b + c).ToPerfJson(), 'a * b + c') + self.assertEqual((a + b * c).ToPerfJson(), 'a + b * c') + self.assertEqual(((a + a) + a).ToPerfJson(), 'a + a + a') + self.assertEqual(((a + b) * c).ToPerfJson(), '(a + b) * c') + self.assertEqual((a + (b * c)).ToPerfJson(), 'a + b * c') + self.assertEqual(((a / b) * c).ToPerfJson(), 'a / b * c') + self.assertEqual((a / (b * c)).ToPerfJson(), 'a / (b * c)') + + def test_ParsePerfJson(self): + # Based on an example of a real metric. + before = '(a + b + c + d) / (2 * e)' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle events with '-' in their name. Note, in + # the json file the '\' are doubled to '\\'. + before = r'topdown\-fe\-bound / topdown\-slots - 1' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle escaped modifiers. Note, in the json file + # the '\' are doubled to '\\'. + before = r'arb@event\=0x81\,umask\=0x1@ + arb@event\=0x84\,umask\=0x1@' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Parsing should handle exponents in numbers. + before = r'a + 1e12 + b' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + def test_IfElseTests(self): + # if-else needs rewriting to Select and back. + before = r'Event1 if #smt_on else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before = r'Event1 if 0 else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before = r'Event1 if 1 else Event2' + after = f'({before})' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + # Ensure the select is evaluate last. + before = r'Event1 + 1 if Event2 < 2 else Event3 + 3' + after = (r'Select(Event(r"Event1") + Constant(1), Event(r"Event2") < ' + r'Constant(2), Event(r"Event3") + Constant(3))') + self.assertEqual(ParsePerfJson(before).ToPython(), after) + + before = r'Event1 > 1 if Event2 < 2 else Event3 > 3' + after = (r'Select(Event(r"Event1") > Constant(1), Event(r"Event2") < ' + r'Constant(2), Event(r"Event3") > Constant(3))') + self.assertEqual(ParsePerfJson(before).ToPython(), after) + + before = r'min(a + b if c > 1 else c + d, e + f)' + after = r'min((a + b if c > 1 else c + d), e + f)' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + before =3D r'a if b else c if d else e' + after =3D r'(a if b else (c if d else e))' + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + + def test_ToPython(self): + # pylint: disable=eval-used + # Based on an example of a real metric. + before = '(a + b + c + d) / (2 * e)' + py = ParsePerfJson(before).ToPython() + after = eval(py).ToPerfJson() + self.assertEqual(before, after) + + def test_Simplify(self): + before = '1 + 2 + 3' + after = '6' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a + 0' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 + a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a | 0' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 | a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a * 0' + after = '0' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '0 * a' + after = '0' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a * 1' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = '1 * a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if 0 else b' + after = 'b' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if 1 else b' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + before = 'a if b else a' + after = 'a' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + + # Pattern used to add a slots event to metrics that require it. + before = '0 * SLOTS' + after = '0 * SLOTS' + self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + +if __name__ == '__main__': + unittest.main() -- cgit v1.2.3 From 378ef0f5d9d7f4652d7a40e0711e8b845ada1cbd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 5 Dec 2022 14:59:39 -0800 Subject: perf build: Use libtraceevent from the system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the LIBTRACEEVENT_DYNAMIC and LIBTRACEFS_DYNAMIC make command line variables. If libtraceevent isn't installed or NO_LIBTRACEEVENT=1 is passed to the build, don't compile in libtraceevent and libtracefs support. This also disables CONFIG_TRACE that controls "perf trace". CONFIG_LIBTRACEEVENT is used to control enablement in Build/Makefiles, HAVE_LIBTRACEEVENT is used in C code. Without HAVE_LIBTRACEEVENT tracepoints are disabled and as such the commands kmem, kwork, lock, sched and timechart are removed. The majority of commands continue to work including "perf test". Committer notes: Fixed up a tools/perf/util/Build reject and added: #include to tools/perf/util/scripting-engines/trace-event-perl.c. Committer testing: $ rpm -qi libtraceevent-devel Name : libtraceevent-devel Version : 1.5.3 Release : 2.fc36 Architecture: x86_64 Install Date: Mon 25 Jul 2022 03:20:19 PM -03 Group : Unspecified Size : 27728 License : LGPLv2+ and GPLv2+ Signature : RSA/SHA256, Fri 15 Apr 2022 02:11:58 PM -03, Key ID 999f7cbf38ab71f4 Source RPM : libtraceevent-1.5.3-2.fc36.src.rpm Build Date : Fri 15 Apr 2022 10:57:01 AM -03 Build Host : buildvm-x86-05.iad2.fedoraproject.org Packager : Fedora Project Vendor : Fedora Project URL : https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ Bug URL : https://bugz.fedoraproject.org/libtraceevent Summary : Development headers of libtraceevent Description : Development headers of libtraceevent-libs $ Default build: $ ldd ~/bin/perf | grep tracee libtraceevent.so.1 => /lib64/libtraceevent.so.1 (0x00007f1dcaf8f000) $ # perf trace -e sched:* --max-events 10 0.000 migration/0/17 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, dest_cpu: 1) 0.005 migration/0/17 sched:sched_wake_idle_without_ipi(cpu: 1) 0.011 migration/0/17 sched:sched_switch(prev_comm: "", prev_pid: 17 (migration/0), prev_state: 1, next_comm: "", next_prio: 120) 1.173 :0/0 sched:sched_wakeup(comm: "", pid: 3138 (gnome-terminal-), prio: 120) 1.180 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 3138 (gnome-terminal-), next_prio: 120) 0.156 migration/1/21 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, orig_cpu: 1, dest_cpu: 2) 0.160 migration/1/21 sched:sched_wake_idle_without_ipi(cpu: 2) 0.166 migration/1/21 sched:sched_switch(prev_comm: "", prev_pid: 21 (migration/1), prev_state: 1, next_comm: "", next_prio: 120) 1.183 :0/0 sched:sched_wakeup(comm: "", pid: 1602985 (kworker/u16:0-f), prio: 120, target_cpu: 1) 1.186 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 1602985 (kworker/u16:0-f), next_prio: 120) # Had to tweak tools/perf/util/setup.py to make sure the python binding shared object links with libtraceevent if -DHAVE_LIBTRACEEVENT is present in CFLAGS. Building with NO_LIBTRACEEVENT=1 uncovered some more build failures: - Make building of data-convert-bt.c to CONFIG_LIBTRACEEVENT=y - perf-$(CONFIG_LIBTRACEEVENT) += scripts/ - bpf_kwork.o needs also to be dependent on CONFIG_LIBTRACEEVENT=y - The python binding needed some fixups and util/trace-event.c can't be built and linked with the python binding shared object, so remove it in tools/perf/util/setup.py and exclude it from the list of dependencies in the python/perf.so Makefile.perf target. Building without libtraceevent-devel installed uncovered more build failures: - The python binding tools/perf/util/python.c was assuming that traceevent/parse-events.h was always available, which was the case when we defaulted to using the in-kernel tools/lib/traceevent/ files, now we need to enclose it under ifdef HAVE_LIBTRACEEVENT, just like the other parts of it that deal with tracepoints. - We have to ifdef the rules in the Build files with CONFIG_LIBTRACEEVENT=y to build builtin-trace.c and tools/perf/trace/beauty/ as we only ifdef setting CONFIG_TRACE=y when setting NO_LIBTRACEEVENT=1 in the make command line, not when we don't detect libtraceevent-devel installed in the system. Simplification here to avoid these two ways of disabling builtin-trace.c and not having CONFIG_TRACE=y when libtraceevent-devel isn't installed is the clean way. From Athira: tools/perf/arch/powerpc/util/Build -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o Then, ditto for arm64 and s390, detected by container cross build tests. - s/390 uses test__checkevent_tracepoint() that is now only available if HAVE_LIBTRACEEVENT is defined, enclose the callsite with ifder HAVE_LIBTRACEEVENT. Also from Athira: With this change, I could successfully compile in these environment: - Without libtraceevent-devel installed - With libtraceevent-devel installed - With “make NO_LIBTRACEEVENT=1” Then, finally rename CONFIG_TRACEEVENT to CONFIG_LIBTRACEEVENT for consistency with other libraries detected in tools/perf/. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Athira Rajeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20221205225940.3079667-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 20 ++-- tools/perf/Makefile.config | 37 ++++---- tools/perf/Makefile.perf | 104 ++------------------- tools/perf/arch/arm64/util/Build | 2 +- tools/perf/arch/powerpc/util/Build | 2 +- tools/perf/arch/s390/util/Build | 2 +- tools/perf/arch/x86/util/Build | 2 +- tools/perf/arch/x86/util/intel-pt.c | 4 + tools/perf/builtin-annotate.c | 2 + tools/perf/builtin-data.c | 5 +- tools/perf/builtin-inject.c | 8 ++ tools/perf/builtin-kmem.c | 1 + tools/perf/builtin-kvm.c | 12 +-- tools/perf/builtin-kwork.c | 1 + tools/perf/builtin-record.c | 2 + tools/perf/builtin-report.c | 9 +- tools/perf/builtin-script.c | 19 +++- tools/perf/builtin-timechart.c | 1 + tools/perf/builtin-trace.c | 5 +- tools/perf/builtin-version.c | 1 + tools/perf/perf.c | 24 +++-- tools/perf/scripts/python/Perf-Trace-Util/Build | 2 +- tools/perf/tests/Build | 12 +-- tools/perf/tests/builtin-test.c | 6 ++ tools/perf/tests/parse-events.c | 23 ++++- tools/perf/util/Build | 21 +++-- tools/perf/util/data-convert-bt.c | 5 +- tools/perf/util/data-convert-json.c | 9 +- tools/perf/util/evlist.c | 6 +- tools/perf/util/evlist.h | 4 + tools/perf/util/evsel.c | 11 ++- tools/perf/util/evsel.h | 12 ++- tools/perf/util/evsel_fprintf.c | 7 +- tools/perf/util/header.c | 19 ++++ tools/perf/util/header.h | 2 + tools/perf/util/intel-pt.c | 7 +- tools/perf/util/parse-events.c | 15 +++ tools/perf/util/parse-events.h | 1 - tools/perf/util/python.c | 10 ++ tools/perf/util/scripting-engines/Build | 6 +- .../perf/util/scripting-engines/trace-event-perl.c | 1 + .../util/scripting-engines/trace-event-python.c | 1 + tools/perf/util/session.c | 2 + tools/perf/util/session.h | 2 + tools/perf/util/setup.py | 10 +- tools/perf/util/sort.c | 60 ++++++++++-- tools/perf/util/synthetic-events.c | 6 ++ tools/perf/util/trace-event-parse.c | 2 + tools/perf/util/trace-event-read.c | 1 + tools/perf/util/trace-event-scripting.c | 1 + tools/perf/util/trace-event.c | 1 - tools/perf/util/trace-event.h | 11 ++- 52 files changed, 355 insertions(+), 184 deletions(-) diff --git a/tools/perf/Build b/tools/perf/Build index 496b096153bb..6dd67e502295 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -5,7 +5,6 @@ perf-y += builtin-diff.o perf-y += builtin-evlist.o perf-y += builtin-ftrace.o perf-y += builtin-help.o -perf-y += builtin-sched.o perf-y += builtin-buildid-list.o perf-y += builtin-buildid-cache.o perf-y += builtin-kallsyms.o @@ -13,11 +12,8 @@ perf-y += builtin-list.o perf-y += builtin-record.o perf-y += builtin-report.o perf-y += builtin-stat.o -perf-y += builtin-timechart.o perf-y += builtin-top.o perf-y += builtin-script.o -perf-y += builtin-kmem.o -perf-y += builtin-lock.o perf-y += builtin-kvm.o perf-y += builtin-inject.o perf-y += builtin-mem.o @@ -25,9 +21,18 @@ perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o perf-y += builtin-daemon.o -perf-y += builtin-kwork.o -perf-$(CONFIG_TRACE) += builtin-trace.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-kmem.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-kwork.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-lock.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-sched.o +perf-$(CONFIG_LIBTRACEEVENT) += builtin-timechart.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_TRACE) += builtin-trace.o + perf-$(CONFIG_TRACE) += trace/beauty/ +endif + perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ @@ -51,7 +56,6 @@ CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" perf-y += util/ perf-y += arch/ perf-y += ui/ -perf-y += scripts/ -perf-$(CONFIG_TRACE) += trace/beauty/ +perf-$(CONFIG_LIBTRACEEVENT) += scripts/ gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9cc3c48f3288..680228e19c1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -763,18 +763,20 @@ ifndef NO_LIBUNWIND EXTLIBS += $(EXTLIBS_LIBUNWIND) endif -ifeq ($(NO_SYSCALL_TABLE),0) - $(call detected,CONFIG_TRACE) -else - ifndef NO_LIBAUDIT - $(call feature_check,libaudit) - ifneq ($(feature-libaudit), 1) - msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); - NO_LIBAUDIT := 1 - else - CFLAGS += -DHAVE_LIBAUDIT_SUPPORT - EXTLIBS += -laudit - $(call detected,CONFIG_TRACE) +ifneq ($(NO_LIBTRACEEVENT),1) + ifeq ($(NO_SYSCALL_TABLE),0) + $(call detected,CONFIG_TRACE) + else + ifndef NO_LIBAUDIT + $(call feature_check,libaudit) + ifneq ($(feature-libaudit), 1) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + NO_LIBAUDIT := 1 + else + CFLAGS += -DHAVE_LIBAUDIT_SUPPORT + EXTLIBS += -laudit + $(call detected,CONFIG_TRACE) + endif endif endif endif @@ -1182,9 +1184,11 @@ ifdef LIBPFM4 endif endif -ifdef LIBTRACEEVENT_DYNAMIC +# libtraceevent is a recommended dependency picked up from the system. +ifneq ($(NO_LIBTRACEEVENT),1) $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) + CFLAGS += -DHAVE_LIBTRACEEVENT EXTLIBS += -ltraceevent LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent) LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION))) @@ -1192,12 +1196,11 @@ ifdef LIBTRACEEVENT_DYNAMIC LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION))) LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) + $(call detected,CONFIG_LIBTRACEEVENT) else - dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); + dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel) endif -endif -ifdef LIBTRACEFS_DYNAMIC $(call feature_check,libtracefs) ifeq ($(feature-libtracefs), 1) EXTLIBS += -ltracefs @@ -1207,8 +1210,6 @@ ifdef LIBTRACEFS_DYNAMIC LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) - else - dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6689f644782f..98f629bbd1aa 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -128,10 +128,6 @@ include ../scripts/utilities.mak # # Define BUILD_BPF_SKEL to enable BPF skeletons # -# Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking -# -# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking -# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -242,10 +238,6 @@ sub-make: fixdep else # force_fixdep LIBAPI_DIR = $(srctree)/tools/lib/api/ -ifndef LIBTRACEEVENT_DYNAMIC -LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ -LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins -endif LIBBPF_DIR = $(srctree)/tools/lib/bpf/ LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ @@ -295,31 +287,6 @@ SCRIPT_SH += perf-iostat.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -ifndef LIBTRACEEVENT_DYNAMIC -ifneq ($(OUTPUT),) - LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent -else - LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent -endif -LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins -LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT) -LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT) -LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include -LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a -export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list -CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include -# -# The static build has no dynsym table, so this does not work for -# static build. Looks like linker starts to scream about that now -# (in Fedora 26) so we need to switch it off for static build. -DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) -else -LIBTRACEEVENT_DYNAMIC_LIST = -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -endif - ifneq ($(OUTPUT),) LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi else @@ -380,13 +347,14 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so -PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -ifndef LIBTRACEEVENT_DYNAMIC -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) +ifeq ($(CONFIG_LIBTRACEEVENT),y) + PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) else -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) + PYTHON_EXT_SRCS := $(shell grep -v '^\#\|util/trace-event.c' util/python-ext-sources) endif +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI) + SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) PROGRAMS += $(OUTPUT)perf @@ -430,9 +398,6 @@ ifndef NO_LIBBPF PERFLIBS += $(LIBBPF) endif endif -ifndef LIBTRACEEVENT_DYNAMIC - PERFLIBS += $(LIBTRACEEVENT) -endif # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -682,9 +647,9 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) # Create python binding output directory if not already present _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python') -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF) +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF) $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \ - CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ + CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS)' \ $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/ @@ -710,8 +675,8 @@ $(PERF_IN): prepare FORCE $(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ $(GTK_IN): FORCE prepare @@ -797,10 +762,6 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(LIBSYMBOL) \ bpf-skel -ifndef LIBTRACEEVENT_DYNAMIC -prepare: $(LIBTRACEEVENT) -endif - $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -856,38 +817,6 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -ifndef LIBTRACEEVENT_DYNAMIC -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' - -$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) - $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \ - $@ install_headers - -$(LIBTRACEEVENT)-clean: - $(call QUIET_CLEAN, libtraceevent) - $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT) - -libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT) - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ - plugins - -libtraceevent_plugins-clean: - $(call QUIET_CLEAN, libtraceevent_plugins) - $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT) - -$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ - $(LIBTRACEEVENT_FLAGS) $@ - -install-traceevent-plugins: libtraceevent_plugins - $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ - DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \ - $(LIBTRACEEVENT_FLAGS) install -endif - $(LIBAPI): FORCE | $(LIBAPI_OUTPUT) $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ DESTDIR=$(LIBAPI_DESTDIR) prefix= \ @@ -1095,10 +1024,6 @@ install-tests: all install-gtk install-bin: install-tools install-tests -ifndef LIBTRACEEVENT_DYNAMIC -install-bin: install-traceevent-plugins -endif - install: install-bin try-install-man install-python_ext: @@ -1124,11 +1049,6 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ -ifndef LIBTRACEEVENT_DYNAMIC -$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT): - $(Q)$(MKDIR) -p $@ -endif - ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) @@ -1211,10 +1131,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $( $(call QUIET_CLEAN, Documentation) \ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null -ifndef LIBTRACEEVENT_DYNAMIC -clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean -endif - # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) # file if defined, with no further action. @@ -1232,6 +1148,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope FORCE prepare -.PHONY: libtraceevent_plugins archheaders +.PHONY: archheaders endif # force_fixdep diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 337aa9bdf905..78ef7115be3d 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -3,7 +3,7 @@ perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o perf-y += pmu.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 0115f3166568..9889245c555c 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,5 @@ perf-y += header.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 3d9d0f4f72ca..db6884086997 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,5 @@ perf-y += header.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index dbeb04cb336e..195ccfdef7aa 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,7 +1,7 @@ perf-y += header.o perf-y += tsc.o perf-y += pmu.o -perf-y += kvm-stat.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-y += perf_regs.o perf-y += topdown.o perf-y += machine.o diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index af102f471e9f..1e39a034cee9 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -418,6 +418,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, return 0; } +#ifdef HAVE_LIBTRACEEVENT static int intel_pt_track_switches(struct evlist *evlist) { const char *sched_switch = "sched:sched_switch"; @@ -439,6 +440,7 @@ static int intel_pt_track_switches(struct evlist *evlist) return 0; } +#endif static void intel_pt_valid_str(char *str, size_t len, u64 valid) { @@ -829,6 +831,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ptr->have_sched_switch = 2; } } else { +#ifdef HAVE_LIBTRACEEVENT err = intel_pt_track_switches(evlist); if (err == -EPERM) pr_debug2("Unable to select sched:sched_switch\n"); @@ -836,6 +839,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return err; else ptr->have_sched_switch = 1; +#endif } } diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 517d928c00e3..90458ca6933f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -499,7 +499,9 @@ int cmd_annotate(int argc, const char **argv) .namespaces = perf_event__process_namespaces, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index c22d82d2a73c..b2a9a3b7f68d 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -78,12 +78,13 @@ static int cmd_data_convert(int argc, const char **argv) return bt_convert__perf2json(input_name, to_json, &opts); if (to_ctf) { -#ifdef HAVE_LIBBABELTRACE_SUPPORT +#if defined(HAVE_LIBBABELTRACE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) return bt_convert__perf2ctf(input_name, to_ctf, &opts); #else pr_err("The libbabeltrace support is not compiled in. perf should be " "compiled with environment variables LIBBABELTRACE=1 and " - "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); + "LIBBABELTRACE_DIR=/path/to/libbabeltrace/.\n" + "Check also if libbtraceevent devel files are available.\n"); return -1; #endif } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e254f18986f7..3f4e4dd5abf3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -607,6 +607,7 @@ static int perf_event__repipe_exit(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBTRACEEVENT static int perf_event__repipe_tracing_data(struct perf_session *session, union perf_event *event) { @@ -614,6 +615,7 @@ static int perf_event__repipe_tracing_data(struct perf_session *session, return perf_event__process_tracing_data(session, event); } +#endif static int dso__read_build_id(struct dso *dso) { @@ -807,6 +809,7 @@ static int perf_inject__sched_switch(struct perf_tool *tool, return 0; } +#ifdef HAVE_LIBTRACEEVENT static int perf_inject__sched_stat(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample, @@ -836,6 +839,7 @@ found: build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); return perf_event__repipe(tool, event_sw, &sample_sw, machine); } +#endif static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) { @@ -1961,7 +1965,9 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; +#ifdef HAVE_LIBTRACEEVENT inject->tool.tracing_data = perf_event__repipe_tracing_data; +#endif } output_data_offset = perf_session__data_offset(session->evlist); @@ -1984,8 +1990,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->handler = perf_inject__sched_switch; } else if (!strcmp(name, "sched:sched_process_exit")) evsel->handler = perf_inject__sched_process_exit; +#ifdef HAVE_LIBTRACEEVENT else if (!strncmp(name, "sched:sched_stat_", 17)) evsel->handler = perf_inject__sched_stat; +#endif } } else if (inject->itrace_synth_opts.vm_time_correlation) { session->itrace_synth_opts = &inject->itrace_synth_opts; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ebfab2ca1702..e20656c431a4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -35,6 +35,7 @@ #include #include +#include static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 7d9ec1bac1a2..641e739c717c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -63,7 +63,7 @@ static const char *get_filename_for_perf_kvm(void) return filename; } -#ifdef HAVE_KVM_STAT_SUPPORT +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, @@ -654,7 +654,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events); } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) static int process_lost_event(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -742,7 +742,7 @@ static bool verify_vcpu(int vcpu) return true; } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) /* keeping the max events to a modest level to keep * the processing of samples per mmap smooth. */ @@ -1290,7 +1290,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) return kvm_events_report_vcpu(kvm); } -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) static struct evlist *kvm_live_event_list(void) { struct evlist *evlist; @@ -1507,7 +1507,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) if (strlen(argv[1]) > 2 && strstarts("report", argv[1])) return kvm_events_report(&kvm, argc - 1 , argv + 1); -#ifdef HAVE_TIMERFD_SUPPORT +#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT) if (!strncmp(argv[1], "live", 4)) return kvm_events_live(&kvm, argc - 1 , argv + 1); #endif @@ -1644,7 +1644,7 @@ int cmd_kvm(int argc, const char **argv) return cmd_top(argc, argv); else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0])) return __cmd_buildid_list(file_name, argc, argv); -#ifdef HAVE_KVM_STAT_SUPPORT +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) else if (strlen(argv[0]) > 2 && strstarts("stat", argv[0])) return kvm_cmd_stat(file_name, argc, argv); #endif diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index 0e02b8098644..dc59d75180d1 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -23,6 +23,7 @@ #include #include +#include #include #include diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b7fd7ec586fb..7e17374f6c1a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1701,8 +1701,10 @@ static void record__init_features(struct record *rec) if (rec->no_buildid) perf_header__clear_feat(&session->header, HEADER_BUILD_ID); +#ifdef HAVE_LIBTRACEEVENT if (!have_tracepoints(&rec->evlist->core.entries)) perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); +#endif if (!rec->opts.branch_stack) perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b6d77d3da64f..2ee2ecca208e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -67,6 +67,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct report { struct perf_tool tool; struct perf_session *session; @@ -1199,7 +1203,9 @@ int cmd_report(int argc, const char **argv) .lost = perf_event__process_lost, .read = process_read_event, .attr = process_attr, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, @@ -1660,6 +1666,7 @@ repeat: report.range_num); } +#ifdef HAVE_LIBTRACEEVENT if (session->tevent.pevent && tep_set_function_resolver(session->tevent.pevent, machine__resolve_kernel_addr, @@ -1668,7 +1675,7 @@ repeat: __func__); return -1; } - +#endif sort__setup_elide(stdout); ret = __cmd_report(&report); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d7ec8c1af293..88888fb885c8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -62,6 +62,9 @@ #include "perf.h" #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif static char const *script_name; static char const *generate_script_lang; @@ -2154,12 +2157,12 @@ static void process_event(struct perf_script *script, perf_sample__fprintf_bts(sample, evsel, thread, al, addr_al, machine, fp); return; } - +#ifdef HAVE_LIBTRACEEVENT if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } - +#endif if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) perf_sample__fprintf_synth(sample, evsel, fp); @@ -2283,8 +2286,10 @@ static void process_stat_interval(u64 tstamp) static void setup_scripting(void) { +#ifdef HAVE_LIBTRACEEVENT setup_perl_scripting(); setup_python_scripting(); +#endif } static int flush_scripting(void) @@ -3784,7 +3789,9 @@ int cmd_script(int argc, const char **argv) .fork = perf_event__process_fork, .attr = process_attr, .event_update = perf_event__process_event_update, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .feature = process_feature_event, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, @@ -4215,6 +4222,7 @@ script_found: else symbol_conf.use_callchain = false; +#ifdef HAVE_LIBTRACEEVENT if (session->tevent.pevent && tep_set_function_resolver(session->tevent.pevent, machine__resolve_kernel_addr, @@ -4223,7 +4231,7 @@ script_found: err = -1; goto out_delete; } - +#endif if (generate_script_lang) { struct stat perf_stat; int input; @@ -4259,9 +4267,12 @@ script_found: err = -ENOENT; goto out_delete; } - +#ifdef HAVE_LIBTRACEEVENT err = scripting_ops->generate_script(session->tevent.pevent, "perf-script"); +#else + err = scripting_ops->generate_script(NULL, "perf-script"); +#endif goto out_delete; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c36296bb7637..6c629e7d370a 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -38,6 +38,7 @@ #include "util/string2.h" #include "util/tracepoint.h" #include +#include #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 543c379d2a57..6909cd9f48d1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -15,7 +15,6 @@ */ #include "util/record.h" -#include #include #include #include "util/bpf_map.h" @@ -80,6 +79,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index a71f491224da..a886929ec6e5 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -82,6 +82,7 @@ static void library_status(void) STATUS(HAVE_AIO_SUPPORT, aio); STATUS(HAVE_ZSTD_SUPPORT, zstd); STATUS(HAVE_LIBPFM, libpfm4); + STATUS(HAVE_LIBTRACEEVENT, libtraceevent); } int cmd_version(int argc, const char **argv) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 7af135dea1cd..82bbe0ca858b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -70,20 +70,26 @@ static struct cmd_struct commands[] = { { "report", cmd_report, 0 }, { "bench", cmd_bench, 0 }, { "stat", cmd_stat, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "timechart", cmd_timechart, 0 }, +#endif { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, { "script", cmd_script, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "sched", cmd_sched, 0 }, +#endif #ifdef HAVE_LIBELF_SUPPORT { "probe", cmd_probe, 0 }, #endif +#ifdef HAVE_LIBTRACEEVENT { "kmem", cmd_kmem, 0 }, { "lock", cmd_lock, 0 }, +#endif { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) +#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -91,7 +97,9 @@ static struct cmd_struct commands[] = { { "data", cmd_data, 0 }, { "ftrace", cmd_ftrace, 0 }, { "daemon", cmd_daemon, 0 }, +#ifdef HAVE_LIBTRACEEVENT { "kwork", cmd_kwork, 0 }, +#endif }; struct pager_config { @@ -500,14 +508,18 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) - setup_path(); - argv[0] = "trace"; - return cmd_trace(argc, argv); -#else +#ifndef HAVE_LIBTRACEEVENT + fprintf(stderr, + "trace command not available: missing libtraceevent devel package at build time.\n"); + goto out; +#elif !defined(HAVE_LIBAUDIT_SUPPORT) && !defined(HAVE_SYSCALL_TABLE_SUPPORT) fprintf(stderr, "trace command not available: missing audit-libs devel package at build time.\n"); goto out; +#else + setup_path(); + argv[0] = "trace"; + return cmd_trace(argc, argv); #endif } /* Look for flags.. */ diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index 7d0e33ce6aba..d5fed4e42617 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,3 @@ -perf-y += Context.o +perf-$(CONFIG_LIBTRACEEVENT) += Context.o CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 658b5052c24d..90fd1eb317bb 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -6,13 +6,13 @@ perf-y += parse-events.o perf-y += dso-data.o perf-y += attr.o perf-y += vmlinux-kallsyms.o -perf-y += openat-syscall.o -perf-y += openat-syscall-all-cpus.o -perf-y += openat-syscall-tp-fields.o -perf-y += mmap-basic.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o +perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-tp-fields.o +perf-$(CONFIG_LIBTRACEEVENT) += mmap-basic.o perf-y += perf-record.o perf-y += evsel-roundtrip-name.o -perf-y += evsel-tp-sched.o +perf-$(CONFIG_LIBTRACEEVENT) += evsel-tp-sched.o perf-y += fdarray.o perf-y += pmu.o perf-y += pmu-events.o @@ -30,7 +30,7 @@ perf-y += task-exit.o perf-y += sw-clock.o perf-y += mmap-thread-lookup.o perf-y += thread-maps-share.o -perf-y += switch-tracking.o +perf-$(CONFIG_LIBTRACEEVENT) += switch-tracking.o perf-y += keep-tracking.o perf-y += code-reading.o perf-y += sample-parsing.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index ddd8262bfa26..f6c16ad8ed50 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -38,9 +38,11 @@ struct test_suite *__weak arch_tests[] = { static struct test_suite *generic_tests[] = { &suite__vmlinux_matches_kallsyms, +#ifdef HAVE_LIBTRACEEVENT &suite__openat_syscall_event, &suite__openat_syscall_event_on_all_cpus, &suite__basic_mmap, +#endif &suite__mem, &suite__parse_events, &suite__expr, @@ -51,8 +53,10 @@ static struct test_suite *generic_tests[] = { &suite__dso_data_cache, &suite__dso_data_reopen, &suite__perf_evsel__roundtrip_name_test, +#ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, &suite__syscall_openat_tp_fields, +#endif &suite__attr, &suite__hists_link, &suite__python_use, @@ -71,7 +75,9 @@ static struct test_suite *generic_tests[] = { &suite__thread_maps_share, &suite__hists_output, &suite__hists_cumulate, +#ifdef HAVE_LIBTRACEEVENT &suite__switch_tracking, +#endif &suite__fdarray__filter, &suite__fdarray__add, &suite__kmod_path__parse, diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3440dd2616b0..71a5cb343311 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,6 +20,8 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +#ifdef HAVE_LIBTRACEEVENT + #if defined(__s390x__) /* Return true if kvm module is available and loaded. Test this * and return success when trace point kvm_s390_create_vm @@ -76,6 +78,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) } return TEST_OK; } +#endif /* HAVE_LIBTRACEEVENT */ static int test__checkevent_raw(struct evlist *evlist) { @@ -222,6 +225,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__checkevent_tracepoint_modifier(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -252,6 +256,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) return test__checkevent_tracepoint_multi(evlist); } +#endif /* HAVE_LIBTRACEEVENT */ static int test__checkevent_raw_modifier(struct evlist *evlist) { @@ -453,6 +458,7 @@ static int test__checkevent_pmu(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__checkevent_list(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -491,6 +497,7 @@ static int test__checkevent_list(struct evlist *evlist) return TEST_OK; } +#endif static int test__checkevent_pmu_name(struct evlist *evlist) { @@ -762,6 +769,7 @@ static int test__group2(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *leader; @@ -853,6 +861,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) return TEST_OK; } +#endif static int test__group4(struct evlist *evlist __maybe_unused) { @@ -1460,6 +1469,7 @@ static int test__sym_event_dc(struct evlist *evlist) return TEST_OK; } +#ifdef HAVE_LIBTRACEEVENT static int count_tracepoints(void) { struct dirent *events_ent; @@ -1513,6 +1523,7 @@ static int test__all_tracepoints(struct evlist *evlist) return test__checkevent_tracepoint_multi(evlist); } +#endif /* HAVE_LIBTRACEVENT */ static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) { @@ -1642,6 +1653,7 @@ struct evlist_test { }; static const struct evlist_test test__events[] = { +#ifdef HAVE_LIBTRACEEVENT { .name = "syscalls:sys_enter_openat", .check = test__checkevent_tracepoint, @@ -1652,6 +1664,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_tracepoint_multi, /* 1 */ }, +#endif { .name = "r1a", .check = test__checkevent_raw, @@ -1702,6 +1715,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_w, /* 1 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "syscalls:sys_enter_openat:k", .check = test__checkevent_tracepoint_modifier, @@ -1712,6 +1726,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_tracepoint_multi_modifier, /* 3 */ }, +#endif { .name = "r1a:kp", .check = test__checkevent_raw_modifier, @@ -1757,11 +1772,13 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_w_modifier, /* 2 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "r1,syscalls:sys_enter_openat:k,1:1:hp", .check = test__checkevent_list, /* 3 */ }, +#endif { .name = "instructions:G", .check = test__checkevent_exclude_host_modifier, @@ -1792,11 +1809,13 @@ static const struct evlist_test test__events[] = { .check = test__group2, /* 9 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", .check = test__group3, /* 0 */ }, +#endif { .name = "{cycles:u,instructions:kp}:p", .check = test__group4, @@ -1807,11 +1826,13 @@ static const struct evlist_test test__events[] = { .check = test__group5, /* 2 */ }, +#ifdef HAVE_LIBTRACEEVENT { .name = "*:*", .check = test__all_tracepoints, /* 3 */ }, +#endif { .name = "{cycles,cache-misses:G}:H", .check = test__group_gh1, @@ -1867,7 +1888,7 @@ static const struct evlist_test test__events[] = { .check = test__checkevent_breakpoint_len_rw_modifier, /* 4 */ }, -#if defined(__s390x__) +#if defined(__s390x__) && defined(HAVE_LIBTRACEEVENT) { .name = "kvm-s390:kvm_s390_create_vm", .check = test__checkevent_tracepoint, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d04802bfa23f..8345464d0130 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -69,7 +69,6 @@ perf-y += namespaces.o perf-y += comm.o perf-y += thread.o perf-y += thread_map.o -perf-y += trace-event-parse.o perf-y += parse-events-flex.o perf-y += parse-events-bison.o perf-y += pmu.o @@ -77,11 +76,12 @@ perf-y += pmus.o perf-y += pmu-flex.o perf-y += pmu-bison.o perf-y += pmu-hybrid.o -perf-y += trace-event-read.o -perf-y += trace-event-info.o -perf-y += trace-event-scripting.o -perf-y += trace-event.o perf-y += svghelper.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-scripting.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o +perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o perf-y += sort.o perf-y += hist.o perf-y += util.o @@ -153,8 +153,12 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o +endif + perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o @@ -189,7 +193,10 @@ perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o -perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +endif + perf-y += data-convert-json.o perf-y += scripting-engines/ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index c65cdaf6975e..8031b586e813 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "asm/bug.h" #include "data-convert.h" #include "session.h" @@ -36,6 +35,10 @@ #include "clockid.h" #include "util/sample.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 57db59068cb6..ba9d93ce9463 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -27,6 +27,10 @@ #include "util/thread.h" #include "util/tool.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct convert_json { struct perf_tool tool; FILE *out; @@ -217,6 +221,7 @@ static int process_sample_event(struct perf_tool *tool, } output_json_format(out, false, 3, "]"); +#ifdef HAVE_LIBTRACEEVENT if (sample->raw_data) { int i; struct tep_format_field **fields; @@ -236,7 +241,7 @@ static int process_sample_event(struct perf_tool *tool, free(fields); } } - +#endif output_json_format(out, false, 2, "}"); return 0; } @@ -313,7 +318,9 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, +#ifdef HAVE_LIBTRACEEVENT .tracing_data = perf_event__process_tracing_data, +#endif .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index fbf3192bced9..590d4e77effc 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -290,6 +290,7 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) return evsel; } +#ifdef HAVE_LIBTRACEEVENT struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) { struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0); @@ -305,7 +306,8 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) evlist__add(evlist, evsel); return evsel; -}; +} +#endif int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { @@ -376,6 +378,7 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char return NULL; } +#ifdef HAVE_LIBTRACEEVENT int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { struct evsel *evsel = evsel__newtp(sys, name); @@ -387,6 +390,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, evlist__add(evlist, evsel); return 0; } +#endif struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 16734c6756b3..e5b84ead566c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -127,7 +127,9 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) { return evlist__add_aux_dummy(evlist, true); } +#ifdef HAVE_LIBTRACEEVENT struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide); +#endif int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); @@ -135,7 +137,9 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); int evlist__start_sb_thread(struct evlist *evlist, struct target *target); void evlist__stop_sb_thread(struct evlist *evlist); +#ifdef HAVE_LIBTRACEEVENT int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); +#endif int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0f617359a82f..ca911856c4b1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -57,6 +56,10 @@ #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + struct perf_missing_features perf_missing_features; static clockid_t clockid; @@ -439,7 +442,9 @@ struct evsel *evsel__clone(struct evsel *orig) goto out_err; } evsel->cgrp = cgroup__get(orig->cgrp); +#ifdef HAVE_LIBTRACEEVENT evsel->tp_format = orig->tp_format; +#endif evsel->handler = orig->handler; evsel->core.leader = orig->core.leader; @@ -479,6 +484,7 @@ out_err: /* * Returns pointer with encoded error via interface. */ +#ifdef HAVE_LIBTRACEEVENT struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); @@ -516,6 +522,7 @@ out_free: out_err: return ERR_PTR(err); } +#endif const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", @@ -2758,6 +2765,7 @@ u16 evsel__id_hdr_size(struct evsel *evsel) return size; } +#ifdef HAVE_LIBTRACEEVENT struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); @@ -2831,6 +2839,7 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } +#endif bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f3485799ddf9..d572be41b960 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -72,7 +72,9 @@ struct evsel { char *name; char *group_name; const char *pmu_name; +#ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; +#endif char *filter; unsigned long max_events; double scale; @@ -223,11 +225,14 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) } struct evsel *evsel__clone(struct evsel *orig); -struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); int copy_config_terms(struct list_head *dst, struct list_head *src); void free_config_terms(struct list_head *config_terms); + +#ifdef HAVE_LIBTRACEEVENT +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); + /* * Returns pointer with encoded error via interface. */ @@ -235,10 +240,13 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name) { return evsel__newtp_idx(sys, name, 0); } +#endif struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); +#ifdef HAVE_LIBTRACEEVENT struct tep_event *event_format__new(const char *sys, const char *name); +#endif void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); @@ -323,6 +331,7 @@ bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; +#ifdef HAVE_LIBTRACEEVENT void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); @@ -330,6 +339,7 @@ static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sampl { return evsel__rawptr(evsel, sample, name); } +#endif struct tep_format_field; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8c2ea8001329..bd22c4932d10 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -2,7 +2,6 @@ #include #include #include -#include #include "evsel.h" #include "util/evsel_fprintf.h" #include "util/event.h" @@ -13,6 +12,10 @@ #include "srcline.h" #include "dso.h" +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { va_list args; @@ -74,6 +77,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE term, (u64)evsel->core.attr.sample_freq); } +#ifdef HAVE_LIBTRACEEVENT if (details->trace_fields) { struct tep_format_field *field; @@ -96,6 +100,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE field = field->next; } } +#endif out: fputc('\n', fp); return ++printed; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dc2ae397d400..404d816ca124 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -298,6 +303,7 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) return 0; } +#ifdef HAVE_LIBTRACEEVENT static int write_tracing_data(struct feat_fd *ff, struct evlist *evlist) { @@ -306,6 +312,7 @@ static int write_tracing_data(struct feat_fd *ff, return read_tracing_data(ff->fd, &evlist->core.entries); } +#endif static int write_build_id(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -2394,12 +2401,14 @@ FEAT_PROCESS_STR_FUN(arch, arch); FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc); FEAT_PROCESS_STR_FUN(cpuid, cpuid); +#ifdef HAVE_LIBTRACEEVENT static int process_tracing_data(struct feat_fd *ff, void *data) { ssize_t ret = trace_report(ff->fd, data, false); return ret < 0 ? -1 : 0; } +#endif static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) { @@ -3366,7 +3375,9 @@ err: const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { +#ifdef HAVE_LIBTRACEEVENT FEAT_OPN(TRACING_DATA, tracing_data, false), +#endif FEAT_OPN(BUILD_ID, build_id, false), FEAT_OPR(HOSTNAME, hostname, false), FEAT_OPR(OSRELEASE, osrelease, false), @@ -4082,6 +4093,7 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } +#ifdef HAVE_LIBTRACEEVENT static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent) { struct tep_event *event; @@ -4125,6 +4137,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h return 0; } +#endif int perf_session__read_header(struct perf_session *session, int repipe_fd) { @@ -4230,11 +4243,15 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd) lseek(fd, tmp, SEEK_SET); } +#ifdef HAVE_LIBTRACEEVENT perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent)) goto out_delete_evlist; +#else + perf_header__process_sections(header, fd, NULL, perf_file_section__process); +#endif return 0; out_errno: @@ -4412,6 +4429,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, return 0; } +#ifdef HAVE_LIBTRACEEVENT int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event) { @@ -4459,6 +4477,7 @@ int perf_event__process_tracing_data(struct perf_session *session, return size_read + padding; } +#endif int perf_event__process_build_id(struct perf_session *session, union perf_event *event) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2d5e601ba60f..e3861ae62172 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -160,8 +160,10 @@ int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); +#ifdef HAVE_LIBTRACEEVENT int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event); +#endif int perf_event__process_build_id(struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index e3548ddef254..6d3921627e33 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3142,6 +3142,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; } +#ifdef HAVE_LIBTRACEEVENT static int intel_pt_process_switch(struct intel_pt *pt, struct perf_sample *sample) { @@ -3165,6 +3166,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, -1, tid); } +#endif /* HAVE_LIBTRACEEVENT */ static int intel_pt_context_switch_in(struct intel_pt *pt, struct perf_sample *sample) @@ -3433,9 +3435,12 @@ static int intel_pt_process_event(struct perf_session *session, return err; } +#ifdef HAVE_LIBTRACEEVENT if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) err = intel_pt_process_switch(pt, sample); - else if (event->header.type == PERF_RECORD_ITRACE_START) + else +#endif + if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) err = intel_pt_process_aux_output_hw_id(pt, event, sample); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6502cd679f57..21cce83462b3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -445,6 +445,7 @@ out_free_terms: return ret; } +#ifdef HAVE_LIBTRACEEVENT static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name) { @@ -593,6 +594,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, closedir(events_dir); return ret; } +#endif /* HAVE_LIBTRACEEVENT */ #ifdef HAVE_LIBBPF_SUPPORT struct __add_bpf_event_param { @@ -1143,6 +1145,7 @@ static int config_term_pmu(struct perf_event_attr *attr, return config_term_common(attr, term, err); } +#ifdef HAVE_LIBTRACEEVENT static int config_term_tracepoint(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) @@ -1170,6 +1173,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return 0; } +#endif static int config_attr(struct perf_event_attr *attr, struct list_head *head, @@ -1325,6 +1329,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { +#ifdef HAVE_LIBTRACEEVENT if (head_config) { struct perf_event_attr attr; @@ -1339,6 +1344,16 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, else return add_tracepoint_event(list, idx, sys, event, err, head_config); +#else + (void)list; + (void)idx; + (void)sys; + (void)event; + (void)head_config; + parse_events_error__handle(err, 0, strdup("unsupported tracepoint"), + strdup("libtraceevent is necessary for tracepoint support")); + return -1; +#endif } int parse_events_add_numeric(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 07df7bb7b042..428e72eaafcc 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -18,7 +18,6 @@ struct parse_events_error; struct option; struct perf_pmu; -bool have_tracepoints(struct list_head *evlist); bool is_event_supported(u8 type, u64 config); const char *event_type(int type); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b5941c74a0d6..6fb84b7455b8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -5,7 +5,9 @@ #include #include #include +#ifdef HAVE_LIBTRACEEVENT #include +#endif #include #include "evlist.h" #include "callchain.h" @@ -417,6 +419,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) return ret; } +#ifdef HAVE_LIBTRACEEVENT static bool is_tracepoint(struct pyrf_event *pevent) { return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT; @@ -486,14 +489,17 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) return tracepoint_field(pevent, field); } +#endif /* HAVE_LIBTRACEEVENT */ static PyObject* pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name) { PyObject *obj = NULL; +#ifdef HAVE_LIBTRACEEVENT if (is_tracepoint(pevent)) obj = get_tracepoint_field(pevent, attr_name); +#endif return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name); } @@ -1326,6 +1332,9 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { +#ifndef HAVE_LIBTRACEEVENT + return NULL; +#else struct tep_event *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; @@ -1340,6 +1349,7 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, return _PyLong_FromLong(-1); return _PyLong_FromLong(tp_format->id); +#endif // HAVE_LIBTRACEEVENT } static PyMethodDef perf__methods[] = { diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 0f5ba28339cf..d47820c0b4d4 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,5 +1,7 @@ -perf-$(CONFIG_LIBPERL) += trace-event-perl.o -perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_LIBPERL) += trace-event-perl.o + perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +endif CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 5b602b6d4685..0bacb49408f8 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include /* perl needs the following define, right after including stdbool.h */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d685a7399ee2..fabba21919b8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "../build-id.h" #include "../counts.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1facd4616317..7c021c6cedb9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -314,7 +314,9 @@ void perf_session__delete(struct perf_session *session) evlist__delete(session->evlist); perf_data__close(session->data); } +#ifdef HAVE_LIBTRACEEVENT trace_event__cleanup(&session->tevent); +#endif free(session); } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index be5871ea558f..ee3715e8563b 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -33,7 +33,9 @@ struct perf_session { struct auxtrace *auxtrace; struct itrace_synth_opts *itrace_synth_opts; struct list_head auxtrace_index; +#ifdef HAVE_LIBTRACEEVENT struct trace_event tevent; +#endif struct perf_record_time_conv time_conv; bool repipe; bool one_mmap; diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 43e7ca40b2ec..e80ffbbfacfb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -63,12 +63,18 @@ libperf = getenv('LIBPERF') ext_sources = [f.strip() for f in open('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +extra_libraries = [] + +if '-DHAVE_LIBTRACEEVENT' in cflags: + extra_libraries += [ 'traceevent' ] +else: + ext_sources.remove('util/trace-event.c') + # use full paths with source files ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) -extra_libraries = [] if '-DHAVE_LIBNUMA_SUPPORT' in cflags: - extra_libraries = [ 'numa' ] + extra_libraries += [ 'numa' ] if '-DHAVE_LIBCAP_SUPPORT' in cflags: extra_libraries += [ 'cap' ] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2e7330867e2e..c7a97b33e134 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -22,7 +22,6 @@ #include "srcline.h" #include "strlist.h" #include "strbuf.h" -#include #include "mem-events.h" #include "annotate.h" #include "event.h" @@ -32,6 +31,10 @@ #include #include +#ifdef HAVE_LIBTRACEEVENT +#include +#endif + regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; @@ -743,6 +746,7 @@ struct sort_entry sort_time = { /* --sort trace */ +#ifdef HAVE_LIBTRACEEVENT static char *get_trace_output(struct hist_entry *he) { struct trace_seq seq; @@ -806,6 +810,7 @@ struct sort_entry sort_trace = { .se_snprintf = hist_entry__trace_snprintf, .se_width_idx = HISTC_TRACE, }; +#endif /* HAVE_LIBTRACEEVENT */ /* sort keys for branch stacks */ @@ -2022,7 +2027,9 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), +#ifdef HAVE_LIBTRACEEVENT DIM(SORT_TRACE, "trace", sort_trace), +#endif DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP, "cgroup", sort_cgroup), @@ -2206,7 +2213,14 @@ bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ return hse->se == &sort_ ## key ; \ } +#ifdef HAVE_LIBTRACEEVENT MK_SORT_ENTRY_CHK(trace) +#else +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt __maybe_unused) +{ + return false; +} +#endif MK_SORT_ENTRY_CHK(srcline) MK_SORT_ENTRY_CHK(srcfile) MK_SORT_ENTRY_CHK(thread) @@ -2347,6 +2361,17 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, return 0; } +#ifndef HAVE_LIBTRACEEVENT +bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused) +{ + return false; +} +bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused, + struct hists *hists __maybe_unused) +{ + return false; +} +#else struct hpp_dynamic_entry { struct perf_hpp_fmt hpp; struct evsel *evsel; @@ -2621,6 +2646,7 @@ __alloc_dynamic_entry(struct evsel *evsel, struct tep_format_field *field, return hde; } +#endif /* HAVE_LIBTRACEEVENT */ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) { @@ -2633,6 +2659,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) new_hse = memdup(hse, sizeof(*hse)); if (new_hse) new_fmt = &new_hse->hpp; +#ifdef HAVE_LIBTRACEEVENT } else if (perf_hpp__is_dynamic_entry(fmt)) { struct hpp_dynamic_entry *hde, *new_hde; @@ -2640,6 +2667,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) new_hde = memdup(hde, sizeof(*hde)); if (new_hde) new_fmt = &new_hde->hpp; +#endif } else { new_fmt = memdup(fmt, sizeof(*fmt)); } @@ -2719,6 +2747,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) return evsel; } +#ifdef HAVE_LIBTRACEEVENT static int __dynamic_dimension__add(struct evsel *evsel, struct tep_format_field *field, bool raw_trace, int level) @@ -2789,13 +2818,13 @@ static int add_all_matching_fields(struct evlist *evlist, } return ret; } +#endif /* HAVE_LIBTRACEEVENT */ static int add_dynamic_entry(struct evlist *evlist, const char *tok, int level) { char *str, *event_name, *field_name, *opt_name; struct evsel *evsel; - struct tep_format_field *field; bool raw_trace = symbol_conf.raw_trace; int ret = 0; @@ -2820,6 +2849,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, raw_trace = true; } +#ifdef HAVE_LIBTRACEEVENT if (!strcmp(field_name, "trace_fields")) { ret = add_all_dynamic_fields(evlist, raw_trace, level); goto out; @@ -2829,6 +2859,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } +#endif evsel = find_evsel(evlist, event_name); if (evsel == NULL) { @@ -2843,10 +2874,12 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, goto out; } +#ifdef HAVE_LIBTRACEEVENT if (!strcmp(field_name, "*")) { ret = add_evsel_fields(evsel, raw_trace, level); } else { - field = tep_find_any_field(evsel->tp_format, field_name); + struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name); + if (field == NULL) { pr_debug("Cannot find event field for %s.%s\n", event_name, field_name); @@ -2855,6 +2888,10 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, ret = __dynamic_dimension__add(evsel, field, raw_trace, level); } +#else + (void)level; + (void)raw_trace; +#endif /* HAVE_LIBTRACEEVENT */ out: free(str); @@ -2955,11 +2992,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { - if (!strcmp(dynamic_headers[j], sd->name)) + if (sd->name && !strcmp(dynamic_headers[j], sd->name)) sort_dimension_add_dynamic_header(sd); } @@ -3009,7 +3046,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__BRANCH) @@ -3025,7 +3062,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { struct sort_dimension *sd = &memory_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__MEMORY) @@ -3339,7 +3376,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; return __sort_dimension__add_output(list, sd); @@ -3357,7 +3394,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__BRANCH) @@ -3369,7 +3406,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { struct sort_dimension *sd = &memory_sort_dimensions[i]; - if (strncasecmp(tok, sd->name, strlen(tok))) + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; if (sort__mode != SORT_MODE__MEMORY) @@ -3508,6 +3545,9 @@ void reset_output_field(void) static void add_key(struct strbuf *sb, const char *str, int *llen) { + if (!str) + return; + if (*llen >= 75) { strbuf_addstr(sb, "\n\t\t\t "); *llen = INDENT; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 0645795ff080..3ab6a92b1a6d 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2157,6 +2157,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr * return err; } +#ifdef HAVE_LIBTRACEEVENT int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process) { @@ -2203,6 +2204,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e return aligned_size; } +#endif int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine) @@ -2355,6 +2357,7 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, } ret += err; +#ifdef HAVE_LIBTRACEEVENT if (have_tracepoints(&evlist->core.entries)) { int fd = perf_data__fd(data); @@ -2374,6 +2377,9 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, } ret += err; } +#else + (void)data; +#endif return ret; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c9c83a40647c..2d3c2576bab7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -11,6 +11,8 @@ #include "trace-event.h" #include +#include +#include static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 43146a4ce2fb..1162c49b8082 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 636a010d929b..56175c53f9af 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "debug.h" #include "trace-event.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index b3ee651e3d91..8ad75b31e09b 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 - #include #include #include diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 8f39f5bcb2c2..add6c5d9531c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -2,9 +2,11 @@ #ifndef _PERF_UTIL_TRACE_EVENT_H #define _PERF_UTIL_TRACE_EVENT_H -#include -#include "parse-events.h" +#include +#include +#include +struct evlist; struct machine; struct perf_sample; union perf_event; @@ -18,6 +20,11 @@ struct trace_event { struct tep_plugin_list *plugin_list; }; +typedef char *(tep_func_resolver_t)(void *priv, + unsigned long long *addrp, char **modp); + +bool have_tracepoints(struct list_head *evlist); + int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); int trace_event__register_resolver(struct machine *machine, -- cgit v1.2.3 From 4171925aa9f3f7bf57b100238f148b50c45c3b1b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 29 Nov 2022 22:29:35 -0800 Subject: tools lib traceevent: Remove libtraceevent libtraceevent is now out-of-date and it is better to depend on the system version. Remove this code that is no longer depended upon by any builds. Committer notes: Removed the removed tools/lib/traceevent/ from tools/perf/MANIFEST, so that 'make perf-tar-src-pkg' works. Signed-off-by: Ian Rogers Acked-by: Steven Rostedt (VMware) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20221130062935.2219247-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/.gitignore | 4 - tools/lib/traceevent/Build | 8 - tools/lib/traceevent/Documentation/Makefile | 207 - tools/lib/traceevent/Documentation/asciidoc.conf | 120 - .../Documentation/libtraceevent-commands.txt | 153 - .../Documentation/libtraceevent-cpus.txt | 77 - .../Documentation/libtraceevent-endian_read.txt | 78 - .../Documentation/libtraceevent-event_find.txt | 103 - .../Documentation/libtraceevent-event_get.txt | 99 - .../Documentation/libtraceevent-event_list.txt | 122 - .../Documentation/libtraceevent-event_print.txt | 130 - .../Documentation/libtraceevent-field_find.txt | 118 - .../Documentation/libtraceevent-field_get_val.txt | 122 - .../Documentation/libtraceevent-field_print.txt | 126 - .../Documentation/libtraceevent-field_read.txt | 81 - .../Documentation/libtraceevent-fields.txt | 105 - .../Documentation/libtraceevent-file_endian.txt | 91 - .../Documentation/libtraceevent-filter.txt | 209 - .../Documentation/libtraceevent-func_apis.txt | 183 - .../Documentation/libtraceevent-func_find.txt | 88 - .../Documentation/libtraceevent-handle.txt | 101 - .../Documentation/libtraceevent-header_page.txt | 102 - .../Documentation/libtraceevent-host_endian.txt | 104 - .../Documentation/libtraceevent-long_size.txt | 78 - .../Documentation/libtraceevent-page_size.txt | 82 - .../Documentation/libtraceevent-parse_event.txt | 90 - .../Documentation/libtraceevent-parse_head.txt | 82 - .../Documentation/libtraceevent-plugins.txt | 122 - .../Documentation/libtraceevent-record_parse.txt | 137 - .../libtraceevent-reg_event_handler.txt | 156 - .../Documentation/libtraceevent-reg_print_func.txt | 155 - .../Documentation/libtraceevent-set_flag.txt | 104 - .../Documentation/libtraceevent-strerror.txt | 85 - .../Documentation/libtraceevent-tseq.txt | 158 - .../lib/traceevent/Documentation/libtraceevent.txt | 192 - .../lib/traceevent/Documentation/manpage-1.72.xsl | 14 - .../lib/traceevent/Documentation/manpage-base.xsl | 35 - .../Documentation/manpage-bold-literal.xsl | 17 - .../traceevent/Documentation/manpage-normal.xsl | 13 - .../Documentation/manpage-suppress-sp.xsl | 21 - tools/lib/traceevent/Makefile | 300 - tools/lib/traceevent/event-parse-api.c | 333 - tools/lib/traceevent/event-parse-local.h | 123 - tools/lib/traceevent/event-parse.c | 7624 -------------------- tools/lib/traceevent/event-parse.h | 750 -- tools/lib/traceevent/event-plugin.c | 711 -- tools/lib/traceevent/event-utils.h | 67 - tools/lib/traceevent/kbuffer-parse.c | 809 --- tools/lib/traceevent/kbuffer.h | 68 - tools/lib/traceevent/libtraceevent.pc.template | 10 - tools/lib/traceevent/parse-filter.c | 2281 ------ tools/lib/traceevent/parse-utils.c | 71 - tools/lib/traceevent/plugins/Build | 12 - tools/lib/traceevent/plugins/Makefile | 225 - tools/lib/traceevent/plugins/plugin_cfg80211.c | 43 - tools/lib/traceevent/plugins/plugin_function.c | 282 - tools/lib/traceevent/plugins/plugin_futex.c | 123 - tools/lib/traceevent/plugins/plugin_hrtimer.c | 74 - tools/lib/traceevent/plugins/plugin_jbd2.c | 61 - tools/lib/traceevent/plugins/plugin_kmem.c | 80 - tools/lib/traceevent/plugins/plugin_kvm.c | 527 -- tools/lib/traceevent/plugins/plugin_mac80211.c | 88 - tools/lib/traceevent/plugins/plugin_sched_switch.c | 146 - tools/lib/traceevent/plugins/plugin_scsi.c | 434 -- tools/lib/traceevent/plugins/plugin_tlb.c | 66 - tools/lib/traceevent/plugins/plugin_xen.c | 138 - tools/lib/traceevent/tep_strerror.c | 53 - tools/lib/traceevent/trace-seq.c | 249 - tools/lib/traceevent/trace-seq.h | 55 - tools/perf/MANIFEST | 1 - 70 files changed, 19876 deletions(-) delete mode 100644 tools/lib/traceevent/.gitignore delete mode 100644 tools/lib/traceevent/Build delete mode 100644 tools/lib/traceevent/Documentation/Makefile delete mode 100644 tools/lib/traceevent/Documentation/asciidoc.conf delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-commands.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-cpus.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_get.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_list.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_print.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_print.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_read.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-fields.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-filter.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_find.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-handle.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-header_page.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-long_size.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-page_size.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-plugins.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-strerror.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-tseq.txt delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent.txt delete mode 100644 tools/lib/traceevent/Documentation/manpage-1.72.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-base.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-bold-literal.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-normal.xsl delete mode 100644 tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl delete mode 100644 tools/lib/traceevent/Makefile delete mode 100644 tools/lib/traceevent/event-parse-api.c delete mode 100644 tools/lib/traceevent/event-parse-local.h delete mode 100644 tools/lib/traceevent/event-parse.c delete mode 100644 tools/lib/traceevent/event-parse.h delete mode 100644 tools/lib/traceevent/event-plugin.c delete mode 100644 tools/lib/traceevent/event-utils.h delete mode 100644 tools/lib/traceevent/kbuffer-parse.c delete mode 100644 tools/lib/traceevent/kbuffer.h delete mode 100644 tools/lib/traceevent/libtraceevent.pc.template delete mode 100644 tools/lib/traceevent/parse-filter.c delete mode 100644 tools/lib/traceevent/parse-utils.c delete mode 100644 tools/lib/traceevent/plugins/Build delete mode 100644 tools/lib/traceevent/plugins/Makefile delete mode 100644 tools/lib/traceevent/plugins/plugin_cfg80211.c delete mode 100644 tools/lib/traceevent/plugins/plugin_function.c delete mode 100644 tools/lib/traceevent/plugins/plugin_futex.c delete mode 100644 tools/lib/traceevent/plugins/plugin_hrtimer.c delete mode 100644 tools/lib/traceevent/plugins/plugin_jbd2.c delete mode 100644 tools/lib/traceevent/plugins/plugin_kmem.c delete mode 100644 tools/lib/traceevent/plugins/plugin_kvm.c delete mode 100644 tools/lib/traceevent/plugins/plugin_mac80211.c delete mode 100644 tools/lib/traceevent/plugins/plugin_sched_switch.c delete mode 100644 tools/lib/traceevent/plugins/plugin_scsi.c delete mode 100644 tools/lib/traceevent/plugins/plugin_tlb.c delete mode 100644 tools/lib/traceevent/plugins/plugin_xen.c delete mode 100644 tools/lib/traceevent/tep_strerror.c delete mode 100644 tools/lib/traceevent/trace-seq.c delete mode 100644 tools/lib/traceevent/trace-seq.h diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore deleted file mode 100644 index 7123c70b9ebc..000000000000 --- a/tools/lib/traceevent/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -TRACEEVENT-CFLAGS -libtraceevent-dynamic-list -libtraceevent.so.* diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build deleted file mode 100644 index f9a5d79578f5..000000000000 --- a/tools/lib/traceevent/Build +++ /dev/null @@ -1,8 +0,0 @@ -libtraceevent-y += event-parse.o -libtraceevent-y += event-plugin.o -libtraceevent-y += trace-seq.o -libtraceevent-y += parse-filter.o -libtraceevent-y += parse-utils.o -libtraceevent-y += kbuffer-parse.o -libtraceevent-y += tep_strerror.o -libtraceevent-y += event-parse-api.o diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile deleted file mode 100644 index aa72ab96c3c1..000000000000 --- a/tools/lib/traceevent/Documentation/Makefile +++ /dev/null @@ -1,207 +0,0 @@ -include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak - -# This Makefile and manpage XSL files were taken from tools/perf/Documentation -# and modified for libtraceevent. - -MAN3_TXT= \ - $(wildcard libtraceevent-*.txt) \ - libtraceevent.txt - -MAN_TXT = $(MAN3_TXT) -_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) -_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) -_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT)) - -MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) -MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) -DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3)) - -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix?=$(HOME) -endif -bindir?=$(prefix)/bin -htmldir?=$(prefix)/share/doc/libtraceevent-doc -pdfdir?=$(prefix)/share/doc/libtraceevent-doc -mandir?=$(prefix)/share/man -man3dir=$(mandir)/man3 - -ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf -ASCIIDOC_HTML = xhtml11 -MANPAGE_XSL = manpage-normal.xsl -XMLTO_EXTRA = -INSTALL?=install -RM ?= rm -f - -ifdef USE_ASCIIDOCTOR -ASCIIDOC = asciidoctor -ASCIIDOC_EXTRA = -a compat-mode -ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions -ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual" -ASCIIDOC_HTML = xhtml5 -endif - -XMLTO=xmlto - -_tmp_tool_path := $(call get-executable,$(ASCIIDOC)) -ifeq ($(_tmp_tool_path),) - missing_tools = $(ASCIIDOC) -endif - -ifndef USE_ASCIIDOCTOR -_tmp_tool_path := $(call get-executable,$(XMLTO)) -ifeq ($(_tmp_tool_path),) - missing_tools += $(XMLTO) -endif -endif - -# -# For asciidoc ... -# -7.1.2, no extra settings are needed. -# 8.0-, set ASCIIDOC8. -# - -# -# For docbook-xsl ... -# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) -# 1.69.0, no extra settings are needed? -# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? -# 1.71.1, no extra settings are needed? -# 1.72.0, set DOCBOOK_XSL_172. -# 1.73.0-, set ASCIIDOC_NO_ROFF -# - -# -# If you had been using DOCBOOK_XSL_172 in an attempt to get rid -# of 'the ".ft C" problem' in your generated manpages, and you -# instead ended up with weird characters around callouts, try -# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). -# - -ifdef ASCIIDOC8 -ASCIIDOC_EXTRA += -a asciidoc7compatible -endif -ifdef DOCBOOK_XSL_172 -ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff -MANPAGE_XSL = manpage-1.72.xsl -else - ifdef ASCIIDOC_NO_ROFF - # docbook-xsl after 1.72 needs the regular XSL, but will not - # pass-thru raw roff codes from asciidoc.conf, so turn them off. - ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff - endif -endif -ifdef MAN_BOLD_LITERAL -XMLTO_EXTRA += -m manpage-bold-literal.xsl -endif -ifdef DOCBOOK_SUPPRESS_SP -XMLTO_EXTRA += -m manpage-suppress-sp.xsl -endif - -SHELL_PATH ?= $(SHELL) -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) - -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -export DESTDIR DESTDIR_SQ - -# -# Please note that there is a minor bug in asciidoc. -# The version after 6.0.3 _will_ include the patch found here: -# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2 -# -# Until that version is released you may have to apply the patch -# yourself - yes, all 6 characters of it! -# -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifneq ($(V),1) - QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; - QUIET_XMLTO = @echo ' XMLTO '$@; - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR ' $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V -endif -endif - -all: html man - -man: man3 -man3: $(DOC_MAN3) - -html: $(MAN_HTML) - -$(MAN_HTML) $(DOC_MAN3): asciidoc.conf - -install: install-man - -check-man-tools: -ifdef missing_tools - $(error "You need to install $(missing_tools) for man pages") -endif - -do-install-man: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \ - $(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir); - -install-man: check-man-tools man do-install-man - -uninstall: uninstall-man - -uninstall-man: - $(call QUIET_UNINST, Documentation-man) \ - $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3)) - - -ifdef missing_tools - DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) -else - DO_INSTALL_MAN = do-install-man -endif - -CLEAN_FILES = \ - $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ - $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ - $(DOC_MAN3) *.3 - -clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) - -ifdef USE_ASCIIDOCTOR -$(OUTPUT)%.3 : $(OUTPUT)%.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b manpage -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ -endif - -$(OUTPUT)%.3 : $(OUTPUT)%.xml - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< - -$(OUTPUT)%.xml : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ - -$(MAN_HTML): $(OUTPUT)%.html : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf deleted file mode 100644 index 07595717f06e..000000000000 --- a/tools/lib/traceevent/Documentation/asciidoc.conf +++ /dev/null @@ -1,120 +0,0 @@ -## linktep: macro -# -# Usage: linktep:command[manpage-section] -# -# Note, {0} is the manpage section, while {target} is the command. -# -# Show TEP link as: (
); if section is defined, else just show -# the command. - -[macros] -(?su)[\\]?(?Plinktep):(?P\S*?)\[(?P.*?)\]= - -[attributes] -asterisk=* -plus=+ -caret=^ -startsb=[ -endsb=] -tilde=~ - -ifdef::backend-docbook[] -[linktep-inlinemacro] -{0%{target}} -{0#} -{0#{target}{0}} -{0#} -endif::backend-docbook[] - -ifdef::backend-docbook[] -ifndef::tep-asciidoc-no-roff[] -# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. -# v1.72 breaks with this because it replaces dots not in roff requests. -[listingblock] -{title} - -ifdef::doctype-manpage[] - .ft C -endif::doctype-manpage[] -| -ifdef::doctype-manpage[] - .ft -endif::doctype-manpage[] - -{title#} -endif::tep-asciidoc-no-roff[] - -ifdef::tep-asciidoc-no-roff[] -ifdef::doctype-manpage[] -# The following two small workarounds insert a simple paragraph after screen -[listingblock] -{title} - -| - -{title#} - -[verseblock] -{title} -{title%} -{title#} -| - -{title#} -{title%} -endif::doctype-manpage[] -endif::tep-asciidoc-no-roff[] -endif::backend-docbook[] - -ifdef::doctype-manpage[] -ifdef::backend-docbook[] -[header] -template::[header-declarations] - - -{mantitle} -{manvolnum} -libtraceevent -{libtraceevent_version} -libtraceevent Manual - - - {manname1} - {manname2} - {manname3} - {manname4} - {manname5} - {manname6} - {manname7} - {manname8} - {manname9} - {manname10} - {manname11} - {manname12} - {manname13} - {manname14} - {manname15} - {manname16} - {manname17} - {manname18} - {manname19} - {manname20} - {manname21} - {manname22} - {manname23} - {manname24} - {manname25} - {manname26} - {manname27} - {manname28} - {manname29} - {manname30} - {manpurpose} - -endif::backend-docbook[] -endif::doctype-manpage[] - -ifdef::backend-xhtml11[] -[linktep-inlinemacro] -{target}{0?({0})} -endif::backend-xhtml11[] diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt deleted file mode 100644 index bec552001f8e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt +++ /dev/null @@ -1,153 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_comm, tep_override_comm, tep_pid_is_registered, -tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid - -Manage pid to process name mappings. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); -const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_); -struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); -int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_); --- - -DESCRIPTION ------------ -These functions can be used to handle the mapping between pid and process name. -The library builds a cache of these mappings, which is used to display the name -of the process, instead of its pid. This information can be retrieved from -tracefs/saved_cmdlines file. - -The _tep_register_comm()_ function registers a _pid_ / process name mapping. -If a command with the same _pid_ is already registered, an error is returned. -The _pid_ argument is the process ID, the _comm_ argument is the process name, -_tep_ is the event context. The _comm_ is duplicated internally. - -The _tep_override_comm()_ function registers a _pid_ / process name mapping. -If a process with the same pid is already registered, the process name string is -udapted with the new one. The _pid_ argument is the process ID, the _comm_ -argument is the process name, _tep_ is the event context. The _comm_ is -duplicated internally. - -The _tep_is_pid_registered()_ function checks if a pid has a process name -mapping registered. The _pid_ argument is the process ID, _tep_ is the event -context. - -The _tep_data_comm_from_pid()_ function returns the process name for a given -pid. The _pid_ argument is the process ID, _tep_ is the event context. -The returned string should not be freed, but will be freed when the _tep_ -handler is closed. - -The _tep_data_pid_from_comm()_ function returns a pid for a given process name. -The _comm_ argument is the process name, _tep_ is the event context. -The argument _next_ is the cmdline structure to search for the next pid. -As there may be more than one pid for a given process, the result of this call -can be passed back into a recurring call in the _next_ parameter, to search for -the next pid. If _next_ is NULL, it will return the first pid associated with -the _comm_. The function performs a linear search, so it may be slow. - -The _tep_cmdline_pid()_ function returns the pid associated with a given -_cmdline_. The _tep_ argument is the event context. - -RETURN VALUE ------------- -_tep_register_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this -_pid_ is already registered. - -_tep_override_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_. - -_tep_is_pid_registered()_ function returns true if the _pid_ has a process name -mapped to it, false otherwise. - -_tep_data_comm_from_pid()_ function returns the process name as string, or the -string "<...>" if there is no mapping for the given pid. - -_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that -holds a pid for a given process, or NULL if none is found. This result can be -passed back into a recurring call as the _next_ parameter of the function. - -_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_ - is NULL, then -1 is returned. - -EXAMPLE -------- -The following example registers pid for command "ls", in context of event _tep_ -and performs various searches for pid / process name mappings: -[source,c] --- -#include -... -int ret; -int ls_pid = 1021; -struct tep_handle *tep = tep_alloc(); -... - ret = tep_register_comm(tep, "ls", ls_pid); - if (ret != 0 && errno == EEXIST) - ret = tep_override_comm(tep, "ls", ls_pid); - if (ret != 0) { - /* Failed to register pid / command mapping */ - } -... - if (tep_is_pid_registered(tep, ls_pid) == 0) { - /* Command mapping for ls_pid is not registered */ - } -... - const char *comm = tep_data_comm_from_pid(tep, ls_pid); - if (comm) { - /* Found process name for ls_pid */ - } -... - int pid; - struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL); - while (cmd) { - pid = tep_cmdline_pid(tep, cmd); - /* Found pid for process "ls" */ - cmd = tep_data_pid_from_comm(tep, "ls", cmd); - } --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt deleted file mode 100644 index 5ad70e43b752..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt +++ /dev/null @@ -1,77 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing -buffer representing it. Note, the buffer may be empty. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); -void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); --- - -DESCRIPTION ------------ -The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. - -The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. -The _cpu_ argument is the number of CPUs with tracing data. - -RETURN VALUE ------------- -The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing -data recorded. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_cpus(tep, 5); -... - printf("We have tracing data for %d CPUs", tep_get_cpus(tep)); --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt deleted file mode 100644 index e64851b6e189..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include * - -unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); --- - -DESCRIPTION ------------ -The _tep_read_number()_ function reads an integer from raw data, taking into -account the endianness of the raw data and the current host. The _tep_ argument -is the trace event parser context. The _ptr_ is a pointer to the raw data, where -the integer is, and the _size_ is the size of the integer. - -RETURN VALUE ------------- -The _tep_read_number()_ function returns the integer in the byte order of -the current host. In case of an error, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int offset = 24; - int data = tep_read_number(tep, record->data + offset, 4); - - /* Read the 4 bytes at the offset 24 of data as an integer */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt deleted file mode 100644 index 7bc062c9f76f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt +++ /dev/null @@ -1,103 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_event,tep_find_event_by_name,tep_find_event_by_record - -Find events by given key. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); -struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); -struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); --- - -DESCRIPTION ------------ -This set of functions can be used to search for an event, based on a given -criteria. All functions require a pointer to a _tep_, trace event parser -context. - -The _tep_find_event()_ function searches for an event by given event _id_. The -event ID is assigned dynamically and can be viewed in event's format file, -"ID" field. - -The tep_find_event_by_name()_ function searches for an event by given -event _name_, under the system _sys_. If the _sys_ is NULL (not specified), -the first event with _name_ is returned. - -The tep_find_event_by_record()_ function searches for an event from a given -_record_. - -RETURN VALUE ------------- -All these functions return a pointer to the found event, or NULL if there is no -such event. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event; - -event = tep_find_event(tep, 1857); -if (event == NULL) { - /* There is no event with ID 1857 */ -} - -event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event == NULL) { - /* There is no kvm_exit event, from kvm system */ -} - -void event_from_record(struct tep_record *record) -{ - struct tep_event *event = tep_find_event_by_record(tep, record); - if (event == NULL) { - /* There is no event from given record */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt deleted file mode 100644 index 6525092fc417..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt +++ /dev/null @@ -1,99 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_event, tep_get_first_event, tep_get_events_count - Access events. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); -struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); -int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -The _tep_get_event()_ function returns a pointer to event at the given _index_. -The _tep_ argument is trace event parser context, the _index_ is the index of -the requested event. - -The _tep_get_first_event()_ function returns a pointer to the first event. -As events are stored in an array, this function returns the pointer to the -beginning of the array. The _tep_ argument is trace event parser context. - -The _tep_get_events_count()_ function returns the number of the events -in the array. The _tep_ argument is trace event parser context. - -RETURN VALUE ------------- -The _tep_get_event()_ returns a pointer to the event located at _index_. -NULL is returned in case of error, in case there are no events or _index_ is -out of range. - -The _tep_get_first_event()_ returns a pointer to the first event. NULL is -returned in case of error, or in case there are no events. - -The _tep_get_events_count()_ returns the number of the events. 0 is -returned in case of error, or in case there are no events. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i,count = tep_get_events_count(tep); -struct tep_event *event, *events = tep_get_first_event(tep); - -if (events == NULL) { - /* There are no events */ -} else { - for (i = 0; i < count; i++) { - event = (events+i); - /* process events[i] */ - } - - /* Get the last event */ - event = tep_get_event(tep, count-1); -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt deleted file mode 100644 index fba350e5a4cb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_list_events, tep_list_events_copy - -Get list of events, sorted by given criteria. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_event_sort_type* { - _TEP_EVENT_SORT_ID_, - _TEP_EVENT_SORT_NAME_, - _TEP_EVENT_SORT_SYSTEM_, -}; - -struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); -struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); --- - -DESCRIPTION ------------ -The _tep_list_events()_ function returns an array of pointers to the events, -sorted by the _sort_type_ criteria. The last element of the array is NULL. -The returned memory must not be freed, it is managed by the library. -The function is not thread safe. The _tep_ argument is trace event parser -context. The _sort_type_ argument is the required sort criteria: -[verse] --- - _TEP_EVENT_SORT_ID_ - sort by the event ID. - _TEP_EVENT_SORT_NAME_ - sort by the event (name, system, id) triplet. - _TEP_EVENT_SORT_SYSTEM_ - sort by the event (system, name, id) triplet. --- - -The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_. -It has the same behavior, but the returned array is allocated internally and -must be freed by the caller. Note that the content of the array must not be -freed (see the EXAMPLE below). - -RETURN VALUE ------------- -The _tep_list_events()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must not be freed, -it is managed by the library. - -The _tep_list_events_copy()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must be freed by -the caller. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_event_format **events; - -i=0; -events = tep_list_events(tep, TEP_EVENT_SORT_ID); -if (events == NULL) { - /* Failed to get the events, sorted by ID */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by ID */ - i++; - } -} - -i=0; -events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME); -if (events == NULL) { - /* Failed to get the events, sorted by name */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by name */ - i++; - } - free(events); -} - -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt deleted file mode 100644 index 2c6a61811118..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt +++ /dev/null @@ -1,130 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_event - Writes event information into a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._) --- - -DESCRIPTION ------------ - -The _tep_print_event()_ function parses the event information of the given -_record_ and writes it into the trace sequence _s_, according to the format -string _fmt_. The desired information is specified after the format string. -The _fmt_ is printf-like format string, following arguments are supported: -[verse] --- - TEP_PRINT_PID, "%d" - PID of the event. - TEP_PRINT_CPU, "%d" - Event CPU. - TEP_PRINT_COMM, "%s" - Event command string. - TEP_PRINT_NAME, "%s" - Event name. - TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more - fields - interrupt state, scheduling state, - current context, and preemption count. - Field 1 is the interrupt enabled state: - d : Interrupts are disabled - . : Interrupts are enabled - X : The architecture does not support this - information - Field 2 is the "need resched" state. - N : The task is set to call the scheduler when - possible, as another higher priority task - may need to be scheduled in. - . : The task is not set to call the scheduler. - Field 3 is the context state. - . : Normal context - s : Soft interrupt context - h : Hard interrupt context - H : Hard interrupt context which triggered - during soft interrupt context. - z : NMI context - Z : NMI context which triggered during hard - interrupt context - Field 4 is the preemption count. - . : The preempt count is zero. - On preemptible kernels (where the task can be scheduled - out in arbitrary locations while in kernel context), the - preempt count, when non zero, will prevent the kernel - from scheduling out the current task. The preempt count - number is displayed when it is not zero. - Depending on the kernel, it may show other fields - (lock depth, or migration disabled, which are unique to - specialized kernels). - TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be - specified as part of this format string: - "%precision.divisord". Example: - "%3.1000d" - divide the time by 1000 and print the first - 3 digits before the dot. Thus, the time stamp - "123456000" will be printed as "123.456" - TEP_PRINT_INFO, "%s" - event information. - TEP_PRINT_INFO_RAW, "%s" - event information, in raw format. - --- -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_handle *tep = tep_alloc(); -... -void print_my_event(struct tep_record *record) -{ - trace_seq_reset(&seq); - tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s", - TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU, - TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME, - TEP_PRINT_INFO); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt deleted file mode 100644 index 0896af5b9eff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt +++ /dev/null @@ -1,118 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_common_field, tep_find_field, tep_find_any_field - -Search for a field in an event. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); --- - -DESCRIPTION ------------ -These functions search for a field with given name in an event. The field -returned can be used to find the field content from within a data record. - -The _tep_find_common_field()_ function searches for a common field with _name_ -in the _event_. - -The _tep_find_field()_ function searches for an event specific field with -_name_ in the _event_. - -The _tep_find_any_field()_ function searches for any field with _name_ in the -_event_. - -RETURN VALUE ------------- -The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_ -functions return a pointer to the found field, or NULL in case there is no field -with the requested name. - -EXAMPLE -------- -[source,c] --- -#include -... -void get_htimer_info(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_format_field *field; - struct tep_event *event; - long long softexpires; - int mode; - int pid; - - event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); - - field = tep_find_common_field(event, "common_pid"); - if (field == NULL) { - /* Cannot find "common_pid" field in the event */ - } else { - /* Get pid from the data record */ - pid = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_field(event, "softexpires"); - if (field == NULL) { - /* Cannot find "softexpires" event specific field in the event */ - } else { - /* Get softexpires parameter from the data record */ - softexpires = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_any_field(event, "mode"); - if (field == NULL) { - /* Cannot find "mode" field in the event */ - } else - { - /* Get mode parameter from the data record */ - mode = tep_read_number(tep, record->data + field->offset, - field->size); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt deleted file mode 100644 index 6324f0d48aeb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val, -tep_get_field_raw - Get value of a field. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); --- - -DESCRIPTION ------------ -These functions can be used to find a field and retrieve its value. - -The _tep_get_any_field_val()_ function searches in the _record_ for a field -with _name_, part of the _event_. If the field is found, its value is stored in -_val_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -The _tep_get_common_field_val()_ function does the same as -_tep_get_any_field_val()_, but searches only in the common fields. This works -for any event as all events include the common fields. - -The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_, -but searches only in the event specific fields. - -The _tep_get_field_raw()_ function searches in the _record_ for a field with -_name_, part of the _event_. If the field is found, a pointer to where the field -exists in the record's raw data is returned. The size of the data is stored in -_len_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -RETURN VALUE ------------- -The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and -_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error. - -The _tep_get_field_raw()_ function returns a pointer to field's raw data, and -places the length of this data in _len_. In case of an error NULL is returned. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -... -void process_record(struct tep_record *record) -{ - int len; - char *comm; - struct tep_event_format *event; - unsigned long long val; - - event = tep_find_event_by_record(pevent, record); - if (event != NULL) { - if (tep_get_common_field_val(NULL, event, "common_type", - record, &val, 0) == 0) { - /* Got the value of common type field */ - } - if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) { - /* Got the value of pid specific field */ - } - comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0); - if (comm != NULL) { - /* Got a pointer to the comm event specific field */ - } - } -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt deleted file mode 100644 index 9a9df98ac44d..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt +++ /dev/null @@ -1,126 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field - -Print the field content. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); -void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); -int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); -int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); --- - -DESCRIPTION ------------ -These functions print recorded field's data, according to the field's type. - -The _tep_print_field()_ function extracts from the recorded raw _data_ value of -the _field_ and prints it into _s_, according to the field type. - -The _tep_print_fields()_ prints each field name followed by the record's field -value according to the field's type: -[verse] --- -"field1_name=field1_value field2_name=field2_value ..." --- -It iterates all fields of the _event_, and calls _tep_print_field()_ for each of -them. - -The _tep_print_num_field()_ function prints a numeric field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_ and is printed in the -_s_, according to the given format string _fmt_. If the argument _err_ is -non-zero, and an error occures - it is printed in the _s_. - -The _tep_print_func_field()_ function prints a function field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_. The value is assumed -to be a function address, and a search is perform to find the name of this -function. The function name (if found) and its address are printed in the _s_, -according to the given format string _fmt_. If the argument _err_ is non-zero, -and an error occures - it is printed in _s_. - -RETURN VALUE ------------- -The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1 -on success, -1 in case of an error or 0 if the print buffer _s_ is full. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - trace_seq_reset(&seq); - - /* Print the value of "common_pid" */ - tep_print_field(&seq, record->data, field_pid); - - /* Print all fields of the "hrtimer_start" event */ - tep_print_fields(&seq, record->data, record->size, event); - - /* Print the value of "expires" field with custom format string */ - tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0); - - /* Print the address and the name of "function" field with custom format string */ - tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0); - } - ... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt deleted file mode 100644 index 64e9e25d3fd9..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt +++ /dev/null @@ -1,81 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number_field - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); --- - -DESCRIPTION ------------ -The _tep_read_number_field()_ function reads the value of the _field_ from the -raw _data_ and stores it in the _value_. The function sets the _value_ according -to the endianness of the raw data and the current machine and stores it in -_value_. - -RETURN VALUE ------------- -The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - unsigned long long pid; - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - if (tep_read_number_field(field_pid, record->data, &pid) != 0) { - /* Failed to get "common_pid" value */ - } -} -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt deleted file mode 100644 index 1ccb531d5114..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt +++ /dev/null @@ -1,105 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_event_common_fields, tep_event_fields - Get a list of fields for an event. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); -struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); --- - -DESCRIPTION ------------ -The _tep_event_common_fields()_ function returns an array of pointers to common -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -The _tep_event_fields()_ function returns an array of pointers to event specific -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -RETURN VALUE ------------- -Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return -an array of pointers to tep_format_field structures in case of success, or -NULL in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_format_field **fields; -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event != NULL) { - fields = tep_event_common_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the common fields - of the kvm_exit event - */ - i++; - } - free(fields); - } - fields = tep_event_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the event specific - fields of the kvm_exit event - */ - i++; - } - free(fields); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt deleted file mode 100644 index f401ad311047..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt +++ /dev/null @@ -1,91 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the -raw data being accessed by the tep handler. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - --- -DESCRIPTION ------------ -The _tep_is_file_bigendian()_ function gets the endianness of the raw data, -being accessed by the tep handler. The _tep_ argument is trace event parser -context. - -The _tep_set_file_bigendian()_ function sets the endianness of raw data being -accessed by the tep handler. The _tep_ argument is trace event parser context. -[verse] --- -The _endian_ argument is the endianness: - _TEP_LITTLE_ENDIAN_ - the raw data is in little endian format, - _TEP_BIG_ENDIAN_ - the raw data is in big endian format. --- -RETURN VALUE ------------- -The _tep_is_file_bigendian()_ function returns true if the data is in bigendian -format, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_file_bigendian(tep)) { - /* The raw data is in big endian */ - } else { - /* The raw data is in little endian */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt deleted file mode 100644 index 4a9962d8cb59..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt +++ /dev/null @@ -1,209 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string, -tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered, -tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str - -Event filter related APIs. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); -void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); -void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); -enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); -int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); -int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); -int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); -char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); --- - -DESCRIPTION ------------ -Filters can be attached to traced events. They can be used to filter out various -events when outputting them. Each event can be filtered based on its parameters, -described in the event's format file. This set of functions can be used to -create, delete, modify and attach event filters. - -The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument -is the trace event parser context. - -The _tep_filter_free()_ function frees an event filter and all resources that it -had used. - -The _tep_filter_reset()_ function removes all rules from an event filter and -resets it. - -The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The -_filter_str_ argument is the filter string, that contains the rule. - -The _tep_event_filtered()_ function checks if the event with _event_id_ has -_filter_. - -The _tep_filter_remove_event()_ function removes a _filter_ for an event with -_event_id_. - -The _tep_filter_match()_ function tests if a _record_ matches given _filter_. - -The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter. - -The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_. - -The _tep_filter_make_string()_ function constructs a string, displaying -the _filter_ contents for given _event_id_. - -The _tep_filter_strerror()_ function copies the _filter_ error buffer into the -given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_ -is copied a string, describing the error _err_. - -RETURN VALUE ------------- -The _tep_filter_alloc()_ function returns a pointer to the newly created event -filter, or NULL in case of an error. - -The _tep_filter_add_filter_str()_ function returns 0 if the rule was -successfully added or a negative error code. Use _tep_filter_strerror()_ to see -actual error message in case of an error. - -The _tep_event_filtered()_ function returns 1 if the filter is found for given -event, or 0 otherwise. - -The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or -0 if the event was not found. - -The _tep_filter_match()_ function returns _tep_errno_, according to the result: -[verse] --- -_pass:[TEP_ERRNO__FILTER_MATCH]_ - filter found for event, the record matches. -_pass:[TEP_ERRNO__FILTER_MISS]_ - filter found for event, the record does not match. -_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_ - no filter found for record's event. -_pass:[TEP_ERRNO__NO_FILTER]_ - no rules in the filter. --- -or any other _tep_errno_, if an error occurred during the test. - -The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules - were copied. - -The _tep_filter_compare()_ function returns 1 if the two filters hold the same -content, or 0 if they do not. - -The _tep_filter_make_string()_ function returns a string, which must be freed -with free(), or NULL in case of an error. - -The _tep_filter_strerror()_ function returns 0 if message was filled -successfully, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char errstr[200]; -int ret; - -struct tep_event_filter *filter = tep_filter_alloc(tep); -struct tep_event_filter *filter1 = tep_filter_alloc(tep); -ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1"); -if(ret < 0) { - tep_filter_strerror(filter, ret, errstr, sizeof(errstr)); - /* Failed to add a new rule to the filter, the error string is in errstr */ -} -if (tep_filter_copy(filter1, filter) != 0) { - /* Failed to copy filter in filter1 */ -} -... -if (tep_filter_compare(filter, filter1) != 1) { - /* Both filters are different */ -} -... -void process_record(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_event *event; - char *fstring; - - event = tep_find_event_by_record(tep, record); - - if (tep_event_filtered(filter, event->id) == 1) { - /* The event has filter */ - fstring = tep_filter_make_string(filter, event->id); - if (fstring != NULL) { - /* The filter for the event is in fstring */ - free(fstring); - } - } - - switch (tep_filter_match(filter, record)) { - case TEP_ERRNO__FILTER_MATCH: - /* The filter matches the record */ - break; - case TEP_ERRNO__FILTER_MISS: - /* The filter does not match the record */ - break; - case TEP_ERRNO__FILTER_NOT_FOUND: - /* No filter found for record's event */ - break; - case TEP_ERRNO__NO_FILTER: - /* There are no rules in the filter */ - break - default: - /* An error occurred during the test */ - break; - } - - if (tep_filter_remove_event(filter, event->id) == 1) { - /* The event was removed from the filter */ - } -} - -... -tep_filter_reset(filter); -... -tep_filter_free(filter); -tep_filter_free(filter1); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt deleted file mode 100644 index f6aca0df2151..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt +++ /dev/null @@ -1,183 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function, tep_find_function_address, tep_set_function_resolver, -tep_reset_function_resolver, tep_register_function, tep_register_print_string - -function related tep APIs - -SYNOPSIS --------- -[verse] --- -*#include * - -typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_); -int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); -void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); -int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -Some tools may have already a way to resolve the kernel functions. These APIs -allow them to keep using it instead of duplicating all the entries inside. - -The _tep_func_resolver_t_ type is the prototype of the alternative kernel -functions resolver. This function receives a pointer to its custom context -(set with the _tep_set_function_resolver()_ call ) and the address of a kernel -function, which has to be resolved. In case of success, it should return -the name of the function and its module (if any) in _modp_. - -The _tep_set_function_resolver()_ function registers _func_ as an alternative -kernel functions resolver. The _tep_ argument is trace event parser context. -The _priv_ argument is a custom context of the _func_ function. The function -resolver is used by the APIs _tep_find_function()_, -_tep_find_function_address()_, and _tep_print_func_field()_ to resolve -a function address to a function name. - -The _tep_reset_function_resolver()_ function resets the kernel functions -resolver to the default function. The _tep_ argument is trace event parser -context. - - -These APIs can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select -the function that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select -the function that would contain it. The _tep_ argument is the trace event -parser context. - -The _tep_register_function()_ function registers a function name mapped to an -address and (optional) module. This mapping is used in case the function tracer -or events have "%pS" parameter in its format string. It is common to pass in -the kallsyms function names with their corresponding addresses with this -function. The _tep_ argument is the trace event parser context. The _name_ is -the name of the function, the string is copied internally. The _addr_ is the -start address of the function. The _mod_ is the kernel module the function may -be in (NULL for none). - -The _tep_register_print_string()_ function registers a string by the address -it was stored in the kernel. Some strings internal to the kernel with static -address are passed to certain events. The "%s" in the event's format field -which has an address needs to know what string would be at that address. The -tep_register_print_string() supplies the parsing with the mapping between kernel -addresses and those strings. The _tep_ argument is the trace event parser -context. The _fmt_ is the string to register, it is copied internally. -The _addr_ is the address the string was located at. - - -RETURN VALUE ------------- -The _tep_set_function_resolver()_ function returns 0 in case of success, or -1 -in case of an error. - -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -The _tep_register_function()_ function returns 0 in case of success. In case of -an error -1 is returned, and errno is set to the appropriate error number. - -The _tep_register_print_string()_ function returns 0 in case of success. In case -of an error -1 is returned, and errno is set to the appropriate error number. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *my_resolve_kernel_addr(void *context, - unsigned long long *addrp, char **modp) -{ - struct db *function_database = context; - struct symbol *sym = sql_lookup(function_database, *addrp); - - if (!sym) - return NULL; - - *modp = sym->module_name; - return sym->name; -} - -void show_function( unsigned long long addr) -{ - unsigned long long fstart; - const char *fname; - - if (tep_set_function_resolver(tep, my_resolve_kernel_addr, - function_database) != 0) { - /* failed to register my_resolve_kernel_addr */ - } - - /* These APIs use my_resolve_kernel_addr() to resolve the addr */ - fname = tep_find_function(tep, addr); - fstart = tep_find_function_address(tep, addr); - - /* - addr is in function named fname, starting at fstart address, - at offset (addr - fstart) - */ - - tep_reset_function_resolver(tep); - -} -... - if (tep_register_function(tep, "kvm_exit", - (unsigned long long) 0x12345678, "kvm") != 0) { - /* Failed to register kvm_exit address mapping */ - } -... - if (tep_register_print_string(tep, "print string", - (unsigned long long) 0x87654321, NULL) != 0) { - /* Failed to register "print string" address mapping */ - } -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt deleted file mode 100644 index 04840e244445..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt +++ /dev/null @@ -1,88 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function,tep_find_function_address - Find function name / start address. - -SYNOPSIS --------- -[verse] --- -*#include * - -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -These functions can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select the function -that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select the -function that would contain it. The _tep_ argument is the trace event parser context. - -RETURN VALUE ------------- -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void show_function( unsigned long long addr) -{ - const char *fname = tep_find_function(tep, addr); - unsigned long long fstart = tep_find_function_address(tep, addr); - - /* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt deleted file mode 100644 index 45b20172e262..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt +++ /dev/null @@ -1,101 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage -references of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_handle pass:[*]*tep_alloc*(void); -void *tep_free*(struct tep_handle pass:[*]_tep_); -void *tep_ref*(struct tep_handle pass:[*]_tep_); -void *tep_unref*(struct tep_handle pass:[*]_tep_); -int *tep_get_ref*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -These are the main functions to create and destroy tep_handle - the main -structure, representing the trace event parser context. This context is used as -the input parameter of most library APIs. - -The _tep_alloc()_ function allocates and initializes the tep context. - -The _tep_free()_ function will decrement the reference of the _tep_ handler. -When there is no more references, then it will free the handler, as well -as clean up all its resources that it had used. The argument _tep_ is -the pointer to the trace event parser context. - -The _tep_ref()_ function adds a reference to the _tep_ handler. - -The _tep_unref()_ function removes a reference from the _tep_ handler. When -the last reference is removed, the _tep_ is destroyed, and all resources that -it had used are cleaned up. - -The _tep_ref_get()_ functions gets the current references of the _tep_ handler. - -RETURN VALUE ------------- -_tep_alloc()_ returns a pointer to a newly created tep_handle structure. -NULL is returned in case there is not enough free memory to allocate it. - -_tep_ref_get()_ returns the current references of _tep_. -If _tep_ is NULL, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include - -... -struct tep_handle *tep = tep_alloc(); -... -int ref = tep_get_ref(tep); -tep_ref(tep); -if ( (ref+1) != tep_get_ref(tep)) { - /* Something wrong happened, the counter is not incremented by 1 */ -} -tep_unref(tep); -... -tep_free(tep); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt deleted file mode 100644 index 615d117dc39f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt +++ /dev/null @@ -1,102 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format - -Get the data stored in the header page, in kernel context. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); -int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); -bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); --- -DESCRIPTION ------------ -These functions retrieve information from kernel context, stored in tracefs -events/header_page. Old kernels do not have header page info, so default values -from user space context are used. - -The _tep_get_header_page_size()_ function returns the size of a long integer, -in kernel context. The _tep_ argument is trace event parser context. -This information is retrieved from tracefs events/header_page, "commit" field. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in kernel context. The _tep_ argument is trace event parser context. This -information is retrieved from tracefs events/header_page, "timestamp" field. - -The _tep_is_old_format()_ function returns true if the kernel predates -the addition of events/header_page, otherwise it returns false. - -RETURN VALUE ------------- -The _tep_get_header_page_size()_ function returns the size of a long integer, -in bytes. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in bytes. - -The _tep_is_old_format()_ function returns true, if an old kernel is used to -generate the tracing data, which has no event/header_page. If the kernel is new, -or _tep_ is NULL, false is returned. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - int longsize; - int timesize; - bool old; - - longsize = tep_get_header_page_size(tep); - timesize = tep_get_header_timestamp_size(tep); - old = tep_is_old_format(tep); - - printf ("%s kernel is used to generate the tracing data.\n", - old?"Old":"New"); - printf("The size of a long integer is %d bytes.\n", longsize); - printf("The timestamps size is %d bytes.\n", timesize); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt deleted file mode 100644 index d5d375eb8d1e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set -the endianness of the local machine. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -int *tep_is_bigendian*(void); -bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); --- - -DESCRIPTION ------------ - -The _tep_is_bigendian()_ gets the endianness of the machine, executing -the function. - -The _tep_is_local_bigendian()_ function gets the endianness of the local -machine, saved in the _tep_ handler. The _tep_ argument is the trace event -parser context. This API is a bit faster than _tep_is_bigendian()_, as it -returns cached endianness of the local machine instead of checking it each time. - -The _tep_set_local_bigendian()_ function sets the endianness of the local -machine in the _tep_ handler. The _tep_ argument is trace event parser context. -The _endian_ argument is the endianness: -[verse] --- - _TEP_LITTLE_ENDIAN_ - the machine is little endian, - _TEP_BIG_ENDIAN_ - the machine is big endian. --- - -RETURN VALUE ------------- -The _tep_is_bigendian()_ function returns non zero if the endianness of the -machine, executing the code, is big endian and zero otherwise. - -The _tep_is_local_bigendian()_ function returns true, if the endianness of the -local machine, saved in the _tep_ handler, is big endian, or false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... - if (tep_is_bigendian()) - tep_set_local_bigendian(tep, TEP_BIG_ENDIAN); - else - tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_local_bigendian(tep)) - printf("This machine you are running on is bigendian\n"); - else - printf("This machine you are running on is little endian\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt deleted file mode 100644 index 01d78ea2519a..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on -the machine, where the trace is generated, in bytes - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); -void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. - -The _tep_set_long_size()_ function sets the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. -The _long_size_ is the size of a long integer, in bytes. - -RETURN VALUE ------------- -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated, in bytes. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -tep_set_long_size(tep, 4); -... -int long_size = tep_get_long_size(tep); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt deleted file mode 100644 index 452c0cfa1822..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on -the machine, where the trace is generated - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); -void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); --- - -DESCRIPTION ------------ -The _tep_get_page_size()_ function returns the size of a memory page on -the machine, where the trace is generated. The _tep_ argument is trace -event parser context. - -The _tep_set_page_size()_ function stores in the _tep_ context the size of a -memory page on the machine, where the trace is generated. -The _tep_ argument is trace event parser context. -The _page_size_ argument is the size of a memory page, in bytes. - -RETURN VALUE ------------- -The _tep_get_page_size()_ function returns size of the memory page, in bytes. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... - int page_size = getpagesize(); - - tep_set_page_size(tep, page_size); - - printf("The page size for this machine is %d\n", tep_get_page_size(tep)); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt deleted file mode 100644 index f248114ca1ff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt +++ /dev/null @@ -1,90 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_event, tep_parse_format - Parse the event format information - -SYNOPSIS --------- -[verse] --- -*#include * - -enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); -enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); --- - -DESCRIPTION ------------ -The _tep_parse_event()_ function parses the event format and creates an event -structure to quickly parse raw data for a given event. The _tep_ argument is -the trace event parser context. The created event structure is stored in the -_tep_ context. The _buf_ argument is a buffer with _size_, where the event -format data is. The event format data can be taken from -tracefs/events/.../.../format files. The _sys_ argument is the system of -the event. - -The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only -difference is in the extra _eventp_ argument, where the newly created event -structure is returned. - -RETURN VALUE ------------- -Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success, -or TEP_ERRNO__... in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -struct tep_event *event = NULL; -buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size); -if (tep_parse_event(tep, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} - -if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt deleted file mode 100644 index c90f16c7d8e6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_header_page - Parses the data stored in the header page. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_parse_header_page()_ function parses the header page data from _buf_, -and initializes the _tep_, trace event parser context, with it. The buffer -_buf_ is with _size_, and is supposed to be copied from -tracefs/events/header_page. - -Some old kernels do not have header page info, in this case the -_tep_parse_header_page()_ function can be called with _size_ equal to 0. The -_tep_ context is initialized with default values. The _long_size_ can be used in -this use case, to set the size of a long integer to be used. - -RETURN VALUE ------------- -The _tep_parse_header_page()_ function returns 0 in case of success, or -1 -in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -buf = read_file("/sys/kernel/tracing/events/header_page", &size); -if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) { - /* Failed to parse the header page */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt deleted file mode 100644 index 4d6394397d92..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins. - -SYNOPSIS --------- -[verse] --- -*#include * - -struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); -void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); -void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_, - void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep, - const char pass:[*]path, - const char pass:[*]name, - void pass:[*]data), - void pass:[*]_data_); --- - -DESCRIPTION ------------ -The _tep_load_plugins()_ function loads all plugins, located in the plugin -directories. The _tep_ argument is trace event parser context. -The plugin directories are : -[verse] --- - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST - - System's plugin directory, defined at the library compile time. It - depends on the library installation prefix and usually is - _(install_preffix)/lib/traceevent/plugins_ - - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST --- -Loading of plugins can be controlled by the _tep_flags_, using the -_tep_set_flag()_ API: -[verse] --- - _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in - the system's plugin directory. - _TEP_DISABLE_PLUGINS_ - do not load any plugins. --- -The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if -loading of all plugins is not the desired case. - -The _tep_unload_plugins()_ function unloads the plugins, previously loaded by -_tep_load_plugins()_. The _tep_ argument is trace event parser context. The -_plugin_list_ is the list of loaded plugins, returned by -the _tep_load_plugins()_ function. - -The _tep_load_plugins_hook_ function walks through all directories with plugins -and calls user specified _load_plugin()_ hook for each plugin file. Only files -with given _suffix_ are considered to be plugins. The _data_ is a user specified -context, passed to _load_plugin()_. Directories and the walk order are the same -as in _tep_load_plugins()_ API. - -RETURN VALUE ------------- -The _tep_load_plugins()_ function returns a list of successfully loaded plugins, -or NULL in case no plugins are loaded. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_plugin_list *plugins = tep_load_plugins(tep); -if (plugins == NULL) { - /* no plugins are loaded */ -} -... -tep_unload_plugins(plugins, tep); -... -void print_plugin(struct tep_handle *tep, const char *path, - const char *name, void *data) -{ - pritnf("Found libtraceevent plugin %s/%s\n", path, name); -} -... -tep_load_plugins_hook(tep, ".so", print_plugin, NULL); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt deleted file mode 100644 index e9a69116c78b..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt +++ /dev/null @@ -1,137 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags - -Extract common fields from a record. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *trace_flag_type* { - _TRACE_FLAG_IRQS_OFF_, - _TRACE_FLAG_IRQS_NOSUPPORT_, - _TRACE_FLAG_NEED_RESCHED_, - _TRACE_FLAG_HARDIRQ_, - _TRACE_FLAG_SOFTIRQ_, -}; - -int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); --- - -DESCRIPTION ------------ -This set of functions can be used to extract common fields from a record. - -The _tep_data_type()_ function gets the event id from the record _rec_. -It reads the "common_type" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_pid()_ function gets the process id from the record _rec_. -It reads the "common_pid" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_preempt_count()_ function gets the preemption count from the -record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is -the trace event parser context. - -The _tep_data_flags()_ function gets the latency flags from the record _rec_. -It reads the "common_flags" field. The _tep_ argument is the trace event parser -context. Supported latency flags are: -[verse] --- - _TRACE_FLAG_IRQS_OFF_, Interrupts are disabled. - _TRACE_FLAG_IRQS_NOSUPPORT_, Reading IRQ flag is not supported by the architecture. - _TRACE_FLAG_NEED_RESCHED_, Task needs rescheduling. - _TRACE_FLAG_HARDIRQ_, Hard IRQ is running. - _TRACE_FLAG_SOFTIRQ_, Soft IRQ is running. --- - -RETURN VALUE ------------- -The _tep_data_type()_ function returns an integer, representing the event id. - -The _tep_data_pid()_ function returns an integer, representing the process id - -The _tep_data_preempt_count()_ function returns an integer, representing the -preemption count. - -The _tep_data_flags()_ function returns an integer, representing the latency -flags. Look at the _trace_flag_type_ enum for supported flags. - -All these functions in case of an error return a negative integer. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int data; - - data = tep_data_type(tep, record); - if (data >= 0) { - /* Got the ID of the event */ - } - - data = tep_data_pid(tep, record); - if (data >= 0) { - /* Got the process ID */ - } - - data = tep_data_preempt_count(tep, record); - if (data >= 0) { - /* Got the preemption count */ - } - - data = tep_data_flags(tep, record); - if (data >= 0) { - /* Got the latency flags */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt deleted file mode 100644 index 53d37d72a1c1..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt +++ /dev/null @@ -1,156 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_event_handler, tep_unregister_event_handler - Register / -unregisters a callback function to parse an event information. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_reg_handler* { - _TEP_REGISTER_SUCCESS_, - _TEP_REGISTER_SUCCESS_OVERWRITE_, -}; - -int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); -int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - -typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context); --- - -DESCRIPTION ------------ -The _tep_register_event_handler()_ function registers a handler function, -which is going to be called to parse the information for a given event. -The _tep_ argument is the trace event parser context. The _id_ argument is -the id of the event. The _sys_name_ argument is the name of the system, -the event belongs to. The _event_name_ argument is the name of the event. -If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and -_event_name_ are used. The _func_ is a pointer to the function, which is going -to be called to parse the event information. The _context_ argument is a pointer -to the context data, which will be passed to the _func_. If a handler function -for the same event is already registered, it will be overridden with the new -one. This mechanism allows a developer to override the parsing of a given event. -If for some reason the default print format is not sufficient, the developer -can register a function for an event to be used to parse the data instead. - -The _tep_unregister_event_handler()_ function unregisters the handler function, -previously registered with _tep_register_event_handler()_. The _tep_ argument -is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_, -and _context_ are the same arguments, as when the callback function _func_ was -registered. - -The _tep_event_handler_func_ is the type of the custom event handler -function. The _s_ argument is the trace sequence, it can be used to create a -custom string, describing the event. A _record_ to get the event from is passed -as input parameter and also the _event_ - the handle to the record's event. The -_context_ is custom context, set when the custom event handler is registered. - -RETURN VALUE ------------- -The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_ -if the new handler is registered successfully or -_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten. -If there is not enough memory to complete the registration, -TEP_ERRNO__MEM_ALLOC_FAILED is returned. - -The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed -successful or, -1 if the event was not found. - -The _tep_event_handler_func_ should return -1 in case of an error, -or 0 otherwise. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -int timer_expire_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - trace_seq_printf(s, "hrtimer="); - - if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1) - tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1); - - trace_seq_printf(s, " now="); - - tep_print_num_field(s, "%llu", event, "now", record, 1); - - tep_print_func_field(s, " function=%s", event, "function", record, 0); - - return 0; -} -... - int ret; - - ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if (ret < 0) { - char buf[32]; - - tep_strerror(tep, ret, buf, 32) - printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf); - } else { - switch (ret) { - case TEP_REGISTER_SUCCESS: - printf ("Registered handler for hrtimer_expire_entry\n"); - break; - case TEP_REGISTER_SUCCESS_OVERWRITE: - printf ("Overwrote handler for hrtimer_expire_entry\n"); - break; - } - } -... - ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if ( ret ) - printf ("Failed to unregister handler for hrtimer_expire_entry\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt deleted file mode 100644 index 708dce91ebd8..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt +++ /dev/null @@ -1,155 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_print_function,tep_unregister_print_function - -Registers / Unregisters a helper function. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_func_arg_type* { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args); - -int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); -int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); --- - -DESCRIPTION ------------ -Some events may have helper functions in the print format arguments. -This allows a plugin to dynamically create a way to process one of -these functions. - -The _tep_register_print_function()_ registers such helper function. The _tep_ -argument is the trace event parser context. The _func_ argument is a pointer -to the helper function. The _ret_type_ argument is the return type of the -helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name -of the helper function, as seen in the print format arguments. The _..._ is a -variable list of _tep_func_arg_type_ enums, the _func_ function arguments. -This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section. - -The _tep_unregister_print_function()_ unregisters a helper function, previously -registered with _tep_register_print_function()_. The _tep_ argument is the -trace event parser context. The _func_ and _name_ arguments are the same, used -when the helper function was registered. - -The _tep_func_handler_ is the type of the helper function. The _s_ argument is -the trace sequence, it can be used to create a custom string. -The _args_ is a list of arguments, defined when the helper function was -registered. - -RETURN VALUE ------------- -The _tep_register_print_function()_ function returns 0 in case of success. -In case of an error, TEP_ERRNO_... code is returned. - -The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -Some events have internal functions calls, that appear in the print format -output. For example "tracefs/events/i915/g4x_wm/format" has: -[source,c] --- -print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", - ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary, - REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane, - REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane, - REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc) --- -Notice the call to function _yesno()_ in the print arguments. In the kernel -context, this function has the following implementation: -[source,c] --- -static const char *yesno(int x) -{ - static const char *yes = "yes"; - static const char *no = "no"; - - return x ? yes : no; -} --- -The user space event parser has no idea how to handle this _yesno()_ function. -The _tep_register_print_function()_ API can be used to register a user space -helper function, mapped to the kernel's _yesno()_: -[source,c] --- -#include -#include -... -struct tep_handle *tep = tep_alloc(); -... -static const char *yes_no_helper(int x) -{ - return x ? "yes" : "no"; -} -... - if ( tep_register_print_function(tep, - yes_no_helper, - TEP_FUNC_ARG_STRING, - "yesno", - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID) != 0) { - /* Failed to register yes_no_helper function */ - } - -/* - Now, when the event parser encounters this yesno() function, it will know - how to handle it. -*/ -... - if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) { - /* Failed to unregister yes_no_helper function */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt deleted file mode 100644 index b0599780b9a6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_set_flag, tep_clear_flag, tep_test_flag - -Manage flags of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include * - -enum *tep_flag* { - _TEP_NSEC_OUTPUT_, - _TEP_DISABLE_SYS_PLUGINS_, - _TEP_DISABLE_PLUGINS_ -}; -void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); --- - -DESCRIPTION ------------ -Trace event parser context flags are defined in *enum tep_flag*: -[verse] --- -_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds. -_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin - directory. This directory is defined at library compile - time, and usually depends on library installation - prefix: (install_preffix)/lib/traceevent/plugins -_TEP_DISABLE_PLUGINS_ - disable all library plugins: - - in system's plugin directory - - in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - in user's home directory, _~/.traceevent/plugins_ --- -Note: plugin related flags must me set before calling _tep_load_plugins()_ API. - -The _tep_set_flag()_ function sets _flag_ to _tep_ context. - -The _tep_clear_flag()_ function clears _flag_ from _tep_ context. - -The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context. - -RETURN VALUE ------------- -_tep_test_flag()_ function returns true if _flag_ is set, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -/* Print timestamps in nanoseconds */ -tep_set_flag(tep, TEP_NSEC_OUTPUT); -... -if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) { - /* print timestamps in nanoseconds */ -} else { - /* print timestamps in microseconds */ -} -... -/* Print timestamps in microseconds */ -tep_clear_flag(tep, TEP_NSEC_OUTPUT); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt deleted file mode 100644 index ee4062a00c9f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt +++ /dev/null @@ -1,85 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_strerror - Returns a string describing regular errno and tep error number. - -SYNOPSIS --------- -[verse] --- -*#include * - -int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - --- -DESCRIPTION ------------ -The _tep_strerror()_ function converts tep error number into a human -readable string. -The _tep_ argument is trace event parser context. The _errnum_ is a regular -errno, defined in errno.h, or a tep error number. The string, describing this -error number is copied in the _buf_ argument. The _buflen_ argument is -the size of the _buf_. - -It as a thread safe wrapper around strerror_r(). The library function has two -different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always -behaves as the POSIX version - the error string is copied in the user supplied -buffer. - -RETURN VALUE ------------- -The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the -string is copied into _buf_. If _errnum_ is not a valid error number, --1 is returned and _buf_ is not modified. - -EXAMPLE -------- -[source,c] --- -#include -... -struct tep_handle *tep = tep_alloc(); -... -char buf[32]; -char *pool = calloc(1, 128); -if (tep == NULL) { - tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32); - printf ("The pool is not initialized, %s", buf); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt deleted file mode 100644 index 8ac6aa174e12..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt +++ /dev/null @@ -1,158 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate, -trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf, -trace_seq_do_fprintf, trace_seq_do_printf - -Initialize / destroy a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include * -*#include * - -void *trace_seq_init*(struct trace_seq pass:[*]_s_); -void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); -void *trace_seq_reset*(struct trace_seq pass:[*]_s_); -void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); -int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); -int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); -int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._); -int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); -int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); -int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); --- - -DESCRIPTION ------------ -Trace sequences are used to allow a function to call several other functions -to create a string of data to use. - -The _trace_seq_init()_ function initializes the trace sequence _s_. - -The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees -all its resources that it had used. - -The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All -characters already written in _s_ will be deleted. - -The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts -the null character pass:['\0'] at the end of the buffer. - -The _trace_seq_putc()_ function puts a single character _c_ in the trace -sequence _s_. - -The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the -trace sequence _s_. - -The _trace_seq_printf()_ function puts a formated string _fmt _with -variable arguments _..._ in the trace sequence _s_. - -The _trace_seq_vprintf()_ function puts a formated string _fmt _with -list of arguments _args_ in the trace sequence _s_. - -The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to -the standard output stdout. - -The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_ -to the given file _fp_. - -RETURN VALUE ------------- -Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of -characters put in the trace sequence, or 0 in case of an error - -Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the -trace oversizes the buffer's free space, the number of characters printed, or -a negative value in case of an error. - -Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the -number of printed characters, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include -#include -... -struct trace_seq seq; -trace_seq_init(&seq); -... -void foo_seq_print(struct trace_seq *tseq, char *format, ...) -{ - va_list ap; - va_start(ap, format); - if (trace_seq_vprintf(tseq, format, ap) <= 0) { - /* Failed to print in the trace sequence */ - } - va_end(ap); -} - -trace_seq_reset(&seq); - -char *str = " MAN page example"; -if (trace_seq_puts(&seq, str) != strlen(str)) { - /* Failed to put str in the trace sequence */ -} -if (trace_seq_putc(&seq, ':') != 1) { - /* Failed to put ':' in the trace sequence */ -} -if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) { - /* Failed to print in the trace sequence */ -} -foo_seq_print( &seq, " %d\n", 2); - -trace_seq_terminate(&seq); -... - -if (trace_seq_do_printf(&seq) < 0 ) { - /* Failed to print the sequence buffer to the standard output */ -} -FILE *fp = fopen("trace.txt", "w"); -if (trace_seq_do_fprintf(&seq, fp) < 0 ) [ - /* Failed to print the sequence buffer to the trace.txt file */ -} - -trace_seq_destroy(&seq); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt deleted file mode 100644 index d530a7ce8fb2..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ /dev/null @@ -1,192 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -libtraceevent - Linux kernel trace event library - -SYNOPSIS --------- -[verse] --- -*#include * - -Management of tep handler data structure and access of its members: - struct tep_handle pass:[*]*tep_alloc*(void); - void *tep_free*(struct tep_handle pass:[*]_tep_); - void *tep_ref*(struct tep_handle pass:[*]_tep_); - void *tep_unref*(struct tep_handle pass:[*]_tep_); - int *tep_get_ref*(struct tep_handle pass:[*]_tep_); - void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_); - int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); - void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); - int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); - void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); - int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); - void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); - int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); - int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); - bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); - int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - -Register / unregister APIs: - int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); - int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); - int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); - int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); - int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); - -Plugins management: - struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); - void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); - char pass:[*]pass:[*]*tep_plugin_list_options*(void); - void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_); - int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_); - void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_); - void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_); - -Event related APIs: - struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); - struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); - int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); - struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._); - -Event finding: - struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); - struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); - struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); - -Parsing of event files: - int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); - enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - -APIs related to fields from event's format files: - struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); - struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); - void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); - int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); - -Event fields printing: - void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); - void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); - int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - -Event fields finding: - struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - -Functions resolver: - int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); - void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); - const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - -Filter management: - struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); - enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); - enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); - int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); - int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); - void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); - char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); - int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); - -Parsing various data from the records: - int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - -Command and task related APIs: - const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_); - struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); - int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); - int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_); - -Endian related APIs: - int *tep_is_bigendian*(void); - unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); - bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - -Trace sequences: -*#include * - void *trace_seq_init*(struct trace_seq pass:[*]_s_); - void *trace_seq_reset*(struct trace_seq pass:[*]_s_); - void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); - int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...); - int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); - int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); - int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); - void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); - int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); - int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); --- - -DESCRIPTION ------------ -The libtraceevent(3) library provides APIs to access kernel tracepoint events, -located in the tracefs file system under the events directory. - -ENVIRONMENT ------------ -[verse] --- -TRACEEVENT_PLUGIN_DIR - Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins. --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* , author of *libtraceevent*. -*Tzvetomir Stoyanov* , author of this man page. --- -REPORTING BUGS --------------- -Report bugs to - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl deleted file mode 100644 index b4d315cb8c47..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-1.72.xsl +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl deleted file mode 100644 index a264fa616093..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-base.xsl +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - sp - - - - - - - - br - - - diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl deleted file mode 100644 index 608eb5df6281..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - fB - - - fR - - - diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl deleted file mode 100644 index a48f5b11f3dc..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-normal.xsl +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - -\ -. - - diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl deleted file mode 100644 index a63c7632a87d..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile deleted file mode 100644 index 98dfd4badea3..000000000000 --- a/tools/lib/traceevent/Makefile +++ /dev/null @@ -1,300 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# trace-cmd version -EP_VERSION = 1 -EP_PATCHLEVEL = 1 -EP_EXTRAVERSION = 0 - -# file format version -FILE_VERSION = 6 - -MAKEFLAGS += --no-print-directory - - -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,NM,$(CROSS_COMPILE)nm) -$(call allow-override,PKG_CONFIG,pkg-config) - -EXT = -std=gnu99 -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) -ifeq ($(LP64), 1) - libdir_relative_temp = lib64 -else - libdir_relative_temp = lib -endif - -libdir_relative ?= $(libdir_relative_temp) -prefix ?= /usr/local -libdir = $(prefix)/$(libdir_relative) -man_dir = $(prefix)/share/man -man_dir_SQ = '$(subst ','\'',$(man_dir))' -pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ - --variable pc_path pkg-config | tr ":" " ")) -includedir_relative = traceevent -includedir = $(prefix)/include/$(includedir_relative) -includedir_SQ = '$(subst ','\'',$(includedir))' - -export man_dir man_dir_SQ INSTALL -export DESTDIR DESTDIR_SQ -export EVENT_PARSE_VERSION - -include ../../scripts/Makefile.include - -# copy a bit from Linux kbuild - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -export prefix libdir src obj - -# Shell quotes -libdir_SQ = $(subst ','\'',$(libdir)) -libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) - -CONFIG_INCLUDES = -CONFIG_LIBS = -CONFIG_FLAGS = - -VERSION = $(EP_VERSION) -PATCHLEVEL = $(EP_PATCHLEVEL) -EXTRAVERSION = $(EP_EXTRAVERSION) - -OBJ = $@ -N = - -EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) - -LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) -LIB_INSTALL = libtraceevent.a libtraceevent.so* -LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) - -INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -# Append required CFLAGS -override CFLAGS += -fPIC -override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) -D_GNU_SOURCE - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - -# Disable command line variables (CFLAGS) override from top -# level Makefile (perf), otherwise build Makefile will get -# the same command line setup. -MAKEOVERRIDES= - -export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj - -TE_IN := $(OUTPUT)libtraceevent-in.o -LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) - -CMD_TARGETS = $(LIB_TARGET) - -TARGETS = $(CMD_TARGETS) - -all: all_cmd plugins - -all_cmd: $(CMD_TARGETS) - -$(TE_IN): force - $(Q)$(MAKE) $(build)=libtraceevent - -$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) - $(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ - @ln -sf $(@F) $(OUTPUT)libtraceevent.so - @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) - -$(OUTPUT)libtraceevent.a: $(TE_IN) - $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ - -$(OUTPUT)%.so: $(OUTPUT)%-in.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ - -define make_version.h - (echo '/* This file is automatically generated. Do not modify. */'; \ - echo \#define VERSION_CODE $(shell \ - expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ - echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ - echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ - echo '#define FILE_VERSION '$(FILE_VERSION); \ - ) > $1 -endef - -define update_version.h - ($(call make_version.h, $@.tmp); \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -ep_version.h: force - $(Q)$(N)$(call update_version.h) - -VERSION_FILES = ep_version.h - -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -tags: force - $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ - --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' - -TAGS: force - $(RM) TAGS - find . -name '*.[ch]' | xargs etags \ - --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' - -define do_install_mkdir - if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ - fi -endef - -define do_install - $(call do_install_mkdir,$2); \ - $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' -endef - -PKG_CONFIG_SOURCE_FILE = libtraceevent.pc -PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE)) -define do_install_pkgconfig_file - if [ -n "${pkgconfig_dir}" ]; then \ - cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \ - sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \ - $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ - else \ - (echo Failed to locate pkg-config directory) 1>&2; \ - fi -endef - -install_lib: all_cmd install_plugins install_headers install_pkgconfig - $(call QUIET_INSTALL, $(LIB_TARGET)) \ - $(call do_install_mkdir,$(libdir_SQ)); \ - cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) - -install_pkgconfig: - $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ - $(call do_install_pkgconfig_file,$(prefix)) - -install_headers: - $(call QUIET_INSTALL, traceevent_headers) \ - $(call do_install,event-parse.h,$(includedir_SQ),644); \ - $(call do_install,event-utils.h,$(includedir_SQ),644); \ - $(call do_install,trace-seq.h,$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(includedir_SQ),644); - -install: install_lib - -clean: clean_plugins - $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ - $(RM) TRACEEVENT-CFLAGS tags TAGS; \ - $(RM) $(PKG_CONFIG_FILE) - -PHONY += doc -doc: - $(call descend,Documentation) - -PHONY += doc-clean -doc-clean: - $(call descend,Documentation,clean) - -PHONY += doc-install -doc-install: - $(call descend,Documentation,install) - -PHONY += doc-uninstall -doc-uninstall: - $(call descend,Documentation,uninstall) - -PHONY += help -help: - @echo 'Possible targets:' - @echo'' - @echo ' all - default, compile the library and the'\ - 'plugins' - @echo ' plugins - compile the plugins' - @echo ' install - install the library, the plugins,'\ - 'the header and pkgconfig files' - @echo ' clean - clean the library and the plugins object files' - @echo ' doc - compile the documentation files - man'\ - 'and html pages, in the Documentation directory' - @echo ' doc-clean - clean the documentation files' - @echo ' doc-install - install the man pages' - @echo ' doc-uninstall - uninstall the man pages' - @echo'' - -PHONY += plugins -plugins: - $(call descend,plugins) - -PHONY += install_plugins -install_plugins: - $(call descend,plugins,install) - -PHONY += clean_plugins -clean_plugins: - $(call descend,plugins,clean) - -force: - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. -.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c deleted file mode 100644 index f8361e45d446..000000000000 --- a/tools/lib/traceevent/event-parse-api.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" - -/** - * tep_get_event - returns the event with the given index - * @tep: a handle to the tep_handle - * @index: index of the requested event, in the range 0 .. nr_events - * - * This returns pointer to the element of the events array with the given index - * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned. - */ -struct tep_event *tep_get_event(struct tep_handle *tep, int index) -{ - if (tep && tep->events && index < tep->nr_events) - return tep->events[index]; - - return NULL; -} - -/** - * tep_get_first_event - returns the first event in the events array - * @tep: a handle to the tep_handle - * - * This returns pointer to the first element of the events array - * If @tep is NULL, NULL is returned. - */ -struct tep_event *tep_get_first_event(struct tep_handle *tep) -{ - return tep_get_event(tep, 0); -} - -/** - * tep_get_events_count - get the number of defined events - * @tep: a handle to the tep_handle - * - * This returns number of elements in event array - * If @tep is NULL, 0 is returned. - */ -int tep_get_events_count(struct tep_handle *tep) -{ - if (tep) - return tep->nr_events; - return 0; -} - -/** - * tep_set_flag - set event parser flag - * @tep: a handle to the tep_handle - * @flag: flag, or combination of flags to be set - * can be any combination from enum tep_flag - * - * This sets a flag or combination of flags from enum tep_flag - */ -void tep_set_flag(struct tep_handle *tep, int flag) -{ - if (tep) - tep->flags |= flag; -} - -/** - * tep_clear_flag - clear event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be cleared - * - * This clears a tep flag - */ -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - tep->flags &= ~flag; -} - -/** - * tep_test_flag - check the state of event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be checked - * - * This returns the state of the requested tep flag. - * Returns: true if the flag is set, false otherwise. - */ -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - return tep->flags & flag; - return false; -} - -__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data) -{ - unsigned short swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 8) | - ((data & (0xffULL << 8)) >> 8); - - return swap; -} - -__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data) -{ - unsigned int swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 24) | - ((data & (0xffULL << 8)) << 8) | - ((data & (0xffULL << 16)) >> 8) | - ((data & (0xffULL << 24)) >> 24); - - return swap; -} - -__hidden unsigned long long -data2host8(struct tep_handle *tep, unsigned long long data) -{ - unsigned long long swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 56) | - ((data & (0xffULL << 8)) << 40) | - ((data & (0xffULL << 16)) << 24) | - ((data & (0xffULL << 24)) << 8) | - ((data & (0xffULL << 32)) >> 8) | - ((data & (0xffULL << 40)) >> 24) | - ((data & (0xffULL << 48)) >> 40) | - ((data & (0xffULL << 56)) >> 56); - - return swap; -} - -/** - * tep_get_header_page_size - get size of the header page - * @tep: a handle to the tep_handle - * - * This returns size of the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_size_size; - return 0; -} - -/** - * tep_get_header_timestamp_size - get size of the timestamp in the header page - * @tep: a handle to the tep_handle - * - * This returns size of the timestamp in the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_timestamp_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_ts_size; - return 0; -} - -/** - * tep_get_cpus - get the number of CPUs - * @tep: a handle to the tep_handle - * - * This returns the number of CPUs - * If @tep is NULL, 0 is returned. - */ -int tep_get_cpus(struct tep_handle *tep) -{ - if (tep) - return tep->cpus; - return 0; -} - -/** - * tep_set_cpus - set the number of CPUs - * @tep: a handle to the tep_handle - * - * This sets the number of CPUs - */ -void tep_set_cpus(struct tep_handle *tep, int cpus) -{ - if (tep) - tep->cpus = cpus; -} - -/** - * tep_get_long_size - get the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a long integer on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_long_size(struct tep_handle *tep) -{ - if (tep) - return tep->long_size; - return 0; -} - -/** - * tep_set_long_size - set the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * @size: size, in bytes, of a long integer - * - * This sets the size of a long integer on the traced machine - */ -void tep_set_long_size(struct tep_handle *tep, int long_size) -{ - if (tep) - tep->long_size = long_size; -} - -/** - * tep_get_page_size - get the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a memory page on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->page_size; - return 0; -} - -/** - * tep_set_page_size - set the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * @_page_size: size of a memory page, in bytes - * - * This sets the size of a memory page on the traced machine - */ -void tep_set_page_size(struct tep_handle *tep, int _page_size) -{ - if (tep) - tep->page_size = _page_size; -} - -/** - * tep_is_file_bigendian - return the endian of the file - * @tep: a handle to the tep_handle - * - * This returns true if the file is in big endian order - * If @tep is NULL, false is returned. - */ -bool tep_is_file_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->file_bigendian == TEP_BIG_ENDIAN); - return false; -} - -/** - * tep_set_file_bigendian - set if the file is in big endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the file is in big endian order - * - * This sets if the file is in big endian order - */ -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->file_bigendian = endian; -} - -/** - * tep_is_local_bigendian - return the endian of the saved local machine - * @tep: a handle to the tep_handle - * - * This returns true if the saved local machine in @tep is big endian. - * If @tep is NULL, false is returned. - */ -bool tep_is_local_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->host_bigendian == TEP_BIG_ENDIAN); - return 0; -} - -/** - * tep_set_local_bigendian - set the stored local machine endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the local host has big endian order - * - * This sets the endian order for the local machine. - */ -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->host_bigendian = endian; -} - -/** - * tep_is_old_format - get if an old kernel is used - * @tep: a handle to the tep_handle - * - * This returns true, if an old kernel is used to generate the tracing events or - * false if a new kernel is used. Old kernels did not have header page info. - * If @tep is NULL, false is returned. - */ -bool tep_is_old_format(struct tep_handle *tep) -{ - if (tep) - return tep->old_format; - return false; -} - -/** - * tep_set_test_filters - set a flag to test a filter string - * @tep: a handle to the tep_handle - * @test_filters: the new value of the test_filters flag - * - * This sets a flag to test a filter string. If this flag is set, when - * tep_filter_add_filter_str() API as called,it will print the filter string - * instead of adding it. - */ -void tep_set_test_filters(struct tep_handle *tep, int test_filters) -{ - if (tep) - tep->test_filters = test_filters; -} diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h deleted file mode 100644 index fd4bbcfbb849..000000000000 --- a/tools/lib/traceevent/event-parse-local.h +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#ifndef _PARSE_EVENTS_INT_H -#define _PARSE_EVENTS_INT_H - -struct tep_cmdline; -struct cmdline_list; -struct func_map; -struct func_list; -struct event_handler; -struct func_resolver; -struct tep_plugins_dir; - -#define __hidden __attribute__((visibility ("hidden"))) - -struct tep_handle { - int ref_count; - - int header_page_ts_offset; - int header_page_ts_size; - int header_page_size_offset; - int header_page_size_size; - int header_page_data_offset; - int header_page_data_size; - int header_page_overwrite; - - enum tep_endian file_bigendian; - enum tep_endian host_bigendian; - - int old_format; - - int cpus; - int long_size; - int page_size; - - struct tep_cmdline *cmdlines; - struct cmdline_list *cmdlist; - int cmdline_count; - - struct func_map *func_map; - struct func_resolver *func_resolver; - struct func_list *funclist; - unsigned int func_count; - - struct printk_map *printk_map; - struct printk_list *printklist; - unsigned int printk_count; - - struct tep_event **events; - int nr_events; - struct tep_event **sort_events; - enum tep_event_sort_type last_type; - - int type_offset; - int type_size; - - int pid_offset; - int pid_size; - - int pc_offset; - int pc_size; - - int flags_offset; - int flags_size; - - int ld_offset; - int ld_size; - - int test_filters; - - int flags; - - struct tep_format_field *bprint_ip_field; - struct tep_format_field *bprint_fmt_field; - struct tep_format_field *bprint_buf_field; - - struct event_handler *handlers; - struct tep_function_handler *func_handlers; - - /* cache */ - struct tep_event *last_event; - - struct tep_plugins_dir *plugins_dir; -}; - -enum tep_print_parse_type { - PRINT_FMT_STRING, - PRINT_FMT_ARG_DIGIT, - PRINT_FMT_ARG_POINTER, - PRINT_FMT_ARG_STRING, -}; - -struct tep_print_parse { - struct tep_print_parse *next; - - char *format; - int ls; - enum tep_print_parse_type type; - struct tep_print_arg *arg; - struct tep_print_arg *len_as_arg; -}; - -void free_tep_event(struct tep_event *event); -void free_tep_format_field(struct tep_format_field *field); -void free_tep_plugin_paths(struct tep_handle *tep); - -unsigned short data2host2(struct tep_handle *tep, unsigned short data); -unsigned int data2host4(struct tep_handle *tep, unsigned int data); -unsigned long long data2host8(struct tep_handle *tep, unsigned long long data); - -/* access to the internal parser */ -int peek_char(void); -void init_input_buf(const char *buf, unsigned long long size); -unsigned long long get_input_buf_ptr(void); -const char *get_input_buf(void); -enum tep_event_type read_token(char **tok); -void free_token(char *tok); - -#endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c deleted file mode 100644 index 8e24c4c78c7f..000000000000 --- a/tools/lib/traceevent/event-parse.c +++ /dev/null @@ -1,7624 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - * - * The parts for function graph printing was taken and modified from the - * Linux Kernel that were written by - * - Copyright (C) 2009 Frederic Weisbecker, - * Frederic Weisbecker gave his permission to relicense the code to - * the Lesser General Public License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "event-parse.h" - -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -static const char *input_buf; -static unsigned long long input_buf_ptr; -static unsigned long long input_buf_siz; - -static int is_flag_field; -static int is_symbolic_field; - -static int show_warning = 1; - -#define do_warning(fmt, ...) \ - do { \ - if (show_warning) \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -#define do_warning_event(event, fmt, ...) \ - do { \ - if (!show_warning) \ - continue; \ - \ - if (event) \ - warning("[%s:%s] " fmt, event->system, \ - event->name, ##__VA_ARGS__); \ - else \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -/** - * init_input_buf - init buffer for parsing - * @buf: buffer to parse - * @size: the size of the buffer - * - * Initializes the internal buffer that tep_read_token() will parse. - */ -__hidden void init_input_buf(const char *buf, unsigned long long size) -{ - input_buf = buf; - input_buf_siz = size; - input_buf_ptr = 0; -} - -__hidden const char *get_input_buf(void) -{ - return input_buf; -} - -__hidden unsigned long long get_input_buf_ptr(void) -{ - return input_buf_ptr; -} - -struct event_handler { - struct event_handler *next; - int id; - const char *sys_name; - const char *event_name; - tep_event_handler_func func; - void *context; -}; - -struct func_params { - struct func_params *next; - enum tep_func_arg_type type; -}; - -struct tep_function_handler { - struct tep_function_handler *next; - enum tep_func_arg_type ret_type; - char *name; - tep_func_handler func; - struct func_params *params; - int nr_args; -}; - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg); - -static void free_func_handle(struct tep_function_handler *func); - -void breakpoint(void) -{ - static int x; - x++; -} - -static struct tep_print_arg *alloc_arg(void) -{ - return calloc(1, sizeof(struct tep_print_arg)); -} - -struct tep_cmdline { - char *comm; - int pid; -}; - -static int cmdline_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - - if (ca->pid < cb->pid) - return -1; - if (ca->pid > cb->pid) - return 1; - - return 0; -} - -/* Looking for where to place the key */ -static int cmdline_slot_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - const struct tep_cmdline *cb1 = cb + 1; - - if (ca->pid < cb->pid) - return -1; - - if (ca->pid > cb->pid) { - if (ca->pid <= cb1->pid) - return 0; - return 1; - } - - return 0; -} - -struct cmdline_list { - struct cmdline_list *next; - char *comm; - int pid; -}; - -static int cmdline_init(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist = tep->cmdlist; - struct cmdline_list *item; - struct tep_cmdline *cmdlines; - int i; - - cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count); - if (!cmdlines) - return -1; - - i = 0; - while (cmdlist) { - cmdlines[i].pid = cmdlist->pid; - cmdlines[i].comm = cmdlist->comm; - i++; - item = cmdlist; - cmdlist = cmdlist->next; - free(item); - } - - qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - - tep->cmdlines = cmdlines; - tep->cmdlist = NULL; - - return 0; -} - -static const char *find_cmdline(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return ""; - - if (!tep->cmdlines && cmdline_init(tep)) - return ""; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return comm->comm; - return "<...>"; -} - -/** - * tep_is_pid_registered - return if a pid has a cmdline registered - * @tep: a handle to the trace event parser context - * @pid: The pid to check if it has a cmdline registered with. - * - * Returns true if the pid has a cmdline mapped to it - * false otherwise. - */ -bool tep_is_pid_registered(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return true; - - if (!tep->cmdlines && cmdline_init(tep)) - return false; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return true; - return false; -} - -/* - * If the command lines have been converted to an array, then - * we must add this pid. This is much slower than when cmdlines - * are added before the array is initialized. - */ -static int add_new_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct tep_cmdline *cmdlines = tep->cmdlines; - struct tep_cmdline *cmdline; - struct tep_cmdline key; - char *new_comm; - int cnt; - - if (!pid) - return 0; - - /* avoid duplicates */ - key.pid = pid; - - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - if (cmdline) { - if (!override) { - errno = EEXIST; - return -1; - } - new_comm = strdup(comm); - if (!new_comm) { - errno = ENOMEM; - return -1; - } - free(cmdline->comm); - cmdline->comm = new_comm; - - return 0; - } - - cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1)); - if (!cmdlines) { - errno = ENOMEM; - return -1; - } - tep->cmdlines = cmdlines; - - key.comm = strdup(comm); - if (!key.comm) { - errno = ENOMEM; - return -1; - } - - if (!tep->cmdline_count) { - /* no entries yet */ - tep->cmdlines[0] = key; - tep->cmdline_count++; - return 0; - } - - /* Now find where we want to store the new cmdline */ - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1, - sizeof(*tep->cmdlines), cmdline_slot_cmp); - - cnt = tep->cmdline_count; - if (cmdline) { - /* cmdline points to the one before the spot we want */ - cmdline++; - cnt -= cmdline - tep->cmdlines; - - } else { - /* The new entry is either before or after the list */ - if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) { - tep->cmdlines[tep->cmdline_count++] = key; - return 0; - } - cmdline = &tep->cmdlines[0]; - } - memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline))); - *cmdline = key; - - tep->cmdline_count++; - - return 0; -} - -static int _tep_register_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct cmdline_list *item; - - if (tep->cmdlines) - return add_new_comm(tep, comm, pid, override); - - item = malloc(sizeof(*item)); - if (!item) - return -1; - - if (comm) - item->comm = strdup(comm); - else - item->comm = strdup("<...>"); - if (!item->comm) { - free(item); - return -1; - } - item->pid = pid; - item->next = tep->cmdlist; - - tep->cmdlist = item; - tep->cmdline_count++; - - return 0; -} - -/** - * tep_register_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, -1 is returned and errno is set to EEXIST - */ -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid) -{ - return _tep_register_comm(tep, comm, pid, false); -} - -/** - * tep_override_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, the command string is udapted with the new one - */ -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid) -{ - if (!tep->cmdlines && cmdline_init(tep)) { - errno = ENOMEM; - return -1; - } - return _tep_register_comm(tep, comm, pid, true); -} - -struct func_map { - unsigned long long addr; - char *func; - char *mod; -}; - -struct func_list { - struct func_list *next; - unsigned long long addr; - char *func; - char *mod; -}; - -static int func_cmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if (fa->addr < fb->addr) - return -1; - if (fa->addr > fb->addr) - return 1; - - return 0; -} - -/* - * We are searching for a record in between, not an exact - * match. - */ -static int func_bcmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if ((fa->addr == fb->addr) || - - (fa->addr > fb->addr && - fa->addr < (fb+1)->addr)) - return 0; - - if (fa->addr < fb->addr) - return -1; - - return 1; -} - -static int func_map_init(struct tep_handle *tep) -{ - struct func_list *funclist; - struct func_list *item; - struct func_map *func_map; - int i; - - func_map = malloc(sizeof(*func_map) * (tep->func_count + 1)); - if (!func_map) - return -1; - - funclist = tep->funclist; - - i = 0; - while (funclist) { - func_map[i].func = funclist->func; - func_map[i].addr = funclist->addr; - func_map[i].mod = funclist->mod; - i++; - item = funclist; - funclist = funclist->next; - free(item); - } - - qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp); - - /* - * Add a special record at the end. - */ - func_map[tep->func_count].func = NULL; - func_map[tep->func_count].addr = 0; - func_map[tep->func_count].mod = NULL; - - tep->func_map = func_map; - tep->funclist = NULL; - - return 0; -} - -static struct func_map * -__find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *func; - struct func_map key; - - if (!tep->func_map) - func_map_init(tep); - - key.addr = addr; - - func = bsearch(&key, tep->func_map, tep->func_count, - sizeof(*tep->func_map), func_bcmp); - - return func; -} - -struct func_resolver { - tep_func_resolver_t *func; - void *priv; - struct func_map map; -}; - -/** - * tep_set_function_resolver - set an alternative function resolver - * @tep: a handle to the trace event parser context - * @resolver: function to be used - * @priv: resolver function private state. - * - * Some tools may have already a way to resolve kernel functions, allow them to - * keep using it instead of duplicating all the entries inside tep->funclist. - */ -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv) -{ - struct func_resolver *resolver = malloc(sizeof(*resolver)); - - if (resolver == NULL) - return -1; - - resolver->func = func; - resolver->priv = priv; - - free(tep->func_resolver); - tep->func_resolver = resolver; - - return 0; -} - -/** - * tep_reset_function_resolver - reset alternative function resolver - * @tep: a handle to the trace event parser context - * - * Stop using whatever alternative resolver was set, use the default - * one instead. - */ -void tep_reset_function_resolver(struct tep_handle *tep) -{ - free(tep->func_resolver); - tep->func_resolver = NULL; -} - -static struct func_map * -find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - if (!tep->func_resolver) - return __find_func(tep, addr); - - map = &tep->func_resolver->map; - map->mod = NULL; - map->addr = addr; - map->func = tep->func_resolver->func(tep->func_resolver->priv, - &map->addr, &map->mod); - if (map->func == NULL) - return NULL; - - return map; -} - -/** - * tep_find_function - find a function by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns a pointer to the function stored that has the given - * address. Note, the address does not have to be exact, it - * will select the function that would contain the address. - */ -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return NULL; - - return map->func; -} - -/** - * tep_find_function_address - find a function address by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns the address the function starts at. This can be used in - * conjunction with tep_find_function to print both the function - * name and the function offset. - */ -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return 0; - - return map->addr; -} - -/** - * tep_register_function - register a function with a given address - * @tep: a handle to the trace event parser context - * @function: the function name to register - * @addr: the address the function starts at - * @mod: the kernel module the function may be in (NULL for none) - * - * This registers a function name with an address and module. - * The @func passed in is duplicated. - */ -int tep_register_function(struct tep_handle *tep, char *func, - unsigned long long addr, char *mod) -{ - struct func_list *item = malloc(sizeof(*item)); - - if (!item) - return -1; - - item->next = tep->funclist; - item->func = strdup(func); - if (!item->func) - goto out_free; - - if (mod) { - item->mod = strdup(mod); - if (!item->mod) - goto out_free_func; - } else - item->mod = NULL; - item->addr = addr; - - tep->funclist = item; - tep->func_count++; - - return 0; - -out_free_func: - free(item->func); - item->func = NULL; -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_funcs - print out the stored functions - * @tep: a handle to the trace event parser context - * - * This prints out the stored functions. - */ -void tep_print_funcs(struct tep_handle *tep) -{ - int i; - - if (!tep->func_map) - func_map_init(tep); - - for (i = 0; i < (int)tep->func_count; i++) { - printf("%016llx %s", - tep->func_map[i].addr, - tep->func_map[i].func); - if (tep->func_map[i].mod) - printf(" [%s]\n", tep->func_map[i].mod); - else - printf("\n"); - } -} - -struct printk_map { - unsigned long long addr; - char *printk; -}; - -struct printk_list { - struct printk_list *next; - unsigned long long addr; - char *printk; -}; - -static int printk_cmp(const void *a, const void *b) -{ - const struct printk_map *pa = a; - const struct printk_map *pb = b; - - if (pa->addr < pb->addr) - return -1; - if (pa->addr > pb->addr) - return 1; - - return 0; -} - -static int printk_map_init(struct tep_handle *tep) -{ - struct printk_list *printklist; - struct printk_list *item; - struct printk_map *printk_map; - int i; - - printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1)); - if (!printk_map) - return -1; - - printklist = tep->printklist; - - i = 0; - while (printklist) { - printk_map[i].printk = printklist->printk; - printk_map[i].addr = printklist->addr; - i++; - item = printklist; - printklist = printklist->next; - free(item); - } - - qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp); - - tep->printk_map = printk_map; - tep->printklist = NULL; - - return 0; -} - -static struct printk_map * -find_printk(struct tep_handle *tep, unsigned long long addr) -{ - struct printk_map *printk; - struct printk_map key; - - if (!tep->printk_map && printk_map_init(tep)) - return NULL; - - key.addr = addr; - - printk = bsearch(&key, tep->printk_map, tep->printk_count, - sizeof(*tep->printk_map), printk_cmp); - - return printk; -} - -/** - * tep_register_print_string - register a string by its address - * @tep: a handle to the trace event parser context - * @fmt: the string format to register - * @addr: the address the string was located at - * - * This registers a string by the address it was stored in the kernel. - * The @fmt passed in is duplicated. - */ -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr) -{ - struct printk_list *item = malloc(sizeof(*item)); - char *p; - - if (!item) - return -1; - - item->next = tep->printklist; - item->addr = addr; - - /* Strip off quotes and '\n' from the end */ - if (fmt[0] == '"') - fmt++; - item->printk = strdup(fmt); - if (!item->printk) - goto out_free; - - p = item->printk + strlen(item->printk) - 1; - if (*p == '"') - *p = 0; - - p -= 2; - if (strcmp(p, "\\n") == 0) - *p = 0; - - tep->printklist = item; - tep->printk_count++; - - return 0; - -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_printk - print out the stored strings - * @tep: a handle to the trace event parser context - * - * This prints the string formats that were stored. - */ -void tep_print_printk(struct tep_handle *tep) -{ - int i; - - if (!tep->printk_map) - printk_map_init(tep); - - for (i = 0; i < (int)tep->printk_count; i++) { - printf("%016llx %s\n", - tep->printk_map[i].addr, - tep->printk_map[i].printk); - } -} - -static struct tep_event *alloc_event(void) -{ - return calloc(1, sizeof(struct tep_event)); -} - -static int add_event(struct tep_handle *tep, struct tep_event *event) -{ - int i; - struct tep_event **events = realloc(tep->events, sizeof(event) * - (tep->nr_events + 1)); - if (!events) - return -1; - - tep->events = events; - - for (i = 0; i < tep->nr_events; i++) { - if (tep->events[i]->id > event->id) - break; - } - if (i < tep->nr_events) - memmove(&tep->events[i + 1], - &tep->events[i], - sizeof(event) * (tep->nr_events - i)); - - tep->events[i] = event; - tep->nr_events++; - - event->tep = tep; - - return 0; -} - -static int event_item_type(enum tep_event_type type) -{ - switch (type) { - case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE: - return 1; - case TEP_EVENT_ERROR ... TEP_EVENT_DELIM: - default: - return 0; - } -} - -static void free_flag_sym(struct tep_print_flag_sym *fsym) -{ - struct tep_print_flag_sym *next; - - while (fsym) { - next = fsym->next; - free(fsym->value); - free(fsym->str); - free(fsym); - fsym = next; - } -} - -static void free_arg(struct tep_print_arg *arg) -{ - struct tep_print_arg *farg; - - if (!arg) - return; - - switch (arg->type) { - case TEP_PRINT_ATOM: - free(arg->atom.atom); - break; - case TEP_PRINT_FIELD: - free(arg->field.name); - break; - case TEP_PRINT_FLAGS: - free_arg(arg->flags.field); - free(arg->flags.delim); - free_flag_sym(arg->flags.flags); - break; - case TEP_PRINT_SYMBOL: - free_arg(arg->symbol.field); - free_flag_sym(arg->symbol.symbols); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - free_arg(arg->hex.field); - free_arg(arg->hex.size); - break; - case TEP_PRINT_INT_ARRAY: - free_arg(arg->int_array.field); - free_arg(arg->int_array.count); - free_arg(arg->int_array.el_size); - break; - case TEP_PRINT_TYPE: - free(arg->typecast.type); - free_arg(arg->typecast.item); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - free(arg->string.string); - break; - case TEP_PRINT_BITMASK: - free(arg->bitmask.bitmask); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - free(arg->dynarray.index); - break; - case TEP_PRINT_OP: - free(arg->op.op); - free_arg(arg->op.left); - free_arg(arg->op.right); - break; - case TEP_PRINT_FUNC: - while (arg->func.args) { - farg = arg->func.args; - arg->func.args = farg->next; - free_arg(farg); - } - break; - - case TEP_PRINT_NULL: - default: - break; - } - - free(arg); -} - -static enum tep_event_type get_type(int ch) -{ - if (ch == '\n') - return TEP_EVENT_NEWLINE; - if (isspace(ch)) - return TEP_EVENT_SPACE; - if (isalnum(ch) || ch == '_') - return TEP_EVENT_ITEM; - if (ch == '\'') - return TEP_EVENT_SQUOTE; - if (ch == '"') - return TEP_EVENT_DQUOTE; - if (!isprint(ch)) - return TEP_EVENT_NONE; - if (ch == '(' || ch == ')' || ch == ',') - return TEP_EVENT_DELIM; - - return TEP_EVENT_OP; -} - -static int __read_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr++]; -} - -/** - * peek_char - peek at the next character that will be read - * - * Returns the next character read, or -1 if end of buffer. - */ -__hidden int peek_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr]; -} - -static int extend_token(char **tok, char *buf, int size) -{ - char *newtok = realloc(*tok, size); - - if (!newtok) { - free(*tok); - *tok = NULL; - return -1; - } - - if (!*tok) - strcpy(newtok, buf); - else - strcat(newtok, buf); - *tok = newtok; - - return 0; -} - -static enum tep_event_type force_token(const char *str, char **tok); - -static enum tep_event_type __read_token(char **tok) -{ - char buf[BUFSIZ]; - int ch, last_ch, quote_ch, next_ch; - int i = 0; - int tok_size = 0; - enum tep_event_type type; - - *tok = NULL; - - - ch = __read_char(); - if (ch < 0) - return TEP_EVENT_NONE; - - type = get_type(ch); - if (type == TEP_EVENT_NONE) - return type; - - buf[i++] = ch; - - switch (type) { - case TEP_EVENT_NEWLINE: - case TEP_EVENT_DELIM: - if (asprintf(tok, "%c", ch) < 0) - return TEP_EVENT_ERROR; - - return type; - - case TEP_EVENT_OP: - switch (ch) { - case '-': - next_ch = peek_char(); - if (next_ch == '>') { - buf[i++] = __read_char(); - break; - } - /* fall through */ - case '+': - case '|': - case '&': - case '>': - case '<': - last_ch = ch; - ch = peek_char(); - if (ch != last_ch) - goto test_equal; - buf[i++] = __read_char(); - switch (last_ch) { - case '>': - case '<': - goto test_equal; - default: - break; - } - break; - case '!': - case '=': - goto test_equal; - default: /* what should we do instead? */ - break; - } - buf[i] = 0; - *tok = strdup(buf); - return type; - - test_equal: - ch = peek_char(); - if (ch == '=') - buf[i++] = __read_char(); - goto out; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - /* don't keep quotes */ - i--; - quote_ch = ch; - last_ch = 0; - concat: - do { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - last_ch = ch; - ch = __read_char(); - buf[i++] = ch; - /* the '\' '\' will cancel itself */ - if (ch == '\\' && last_ch == '\\') - last_ch = 0; - } while (ch != quote_ch || last_ch == '\\'); - /* remove the last quote */ - i--; - - /* - * For strings (double quotes) check the next token. - * If it is another string, concatinate the two. - */ - if (type == TEP_EVENT_DQUOTE) { - unsigned long long save_input_buf_ptr = input_buf_ptr; - - do { - ch = __read_char(); - } while (isspace(ch)); - if (ch == '"') - goto concat; - input_buf_ptr = save_input_buf_ptr; - } - - goto out; - - case TEP_EVENT_ERROR ... TEP_EVENT_SPACE: - case TEP_EVENT_ITEM: - default: - break; - } - - while (get_type(peek_char()) == type) { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - ch = __read_char(); - buf[i++] = ch; - } - - out: - buf[i] = 0; - if (extend_token(tok, buf, tok_size + i + 1) < 0) - return TEP_EVENT_NONE; - - if (type == TEP_EVENT_ITEM) { - /* - * Older versions of the kernel has a bug that - * creates invalid symbols and will break the mac80211 - * parsing. This is a work around to that bug. - * - * See Linux kernel commit: - * 811cb50baf63461ce0bdb234927046131fc7fa8b - */ - if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\"%s\" ", tok); - } else if (strcmp(*tok, "STA_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" sta:%pM\" ", tok); - } else if (strcmp(*tok, "VIF_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" vif:%p(%d)\" ", tok); - } - } - - return type; -} - -static enum tep_event_type force_token(const char *str, char **tok) -{ - const char *save_input_buf; - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - enum tep_event_type type; - - /* save off the current input pointers */ - save_input_buf = input_buf; - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - init_input_buf(str, strlen(str)); - - type = __read_token(tok); - - /* reset back to original token */ - input_buf = save_input_buf; - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - - return type; -} - -/** - * free_token - free a token returned by tep_read_token - * @token: the token to free - */ -__hidden void free_token(char *tok) -{ - if (tok) - free(tok); -} - -/** - * read_token - access to utilities to use the tep parser - * @tok: The token to return - * - * This will parse tokens from the string given by - * tep_init_data(). - * - * Returns the token type. - */ -__hidden enum tep_event_type read_token(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE) - return type; - - free_token(*tok); - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -/* no newline */ -static enum tep_event_type read_token_item(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE) - return type; - free_token(*tok); - *tok = NULL; - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -static int test_type(enum tep_event_type type, enum tep_event_type expect) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - return 0; -} - -static int test_type_token(enum tep_event_type type, const char *token, - enum tep_event_type expect, const char *expect_tok) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - - if (strcmp(token, expect_tok) != 0) { - do_warning("Error: expected '%s' but read '%s'", - expect_tok, token); - return -1; - } - return 0; -} - -static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok) -{ - enum tep_event_type type; - - if (newline_ok) - type = read_token(tok); - else - type = read_token_item(tok); - return test_type(type, expect); -} - -static int read_expect_type(enum tep_event_type expect, char **tok) -{ - return __read_expect_type(expect, tok, 1); -} - -static int __read_expected(enum tep_event_type expect, const char *str, - int newline_ok) -{ - enum tep_event_type type; - char *token; - int ret; - - if (newline_ok) - type = read_token(&token); - else - type = read_token_item(&token); - - ret = test_type_token(type, token, expect, str); - - free_token(token); - - return ret; -} - -static int read_expected(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 1); -} - -static int read_expected_item(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 0); -} - -static char *event_read_name(void) -{ - char *token; - - if (read_expected(TEP_EVENT_ITEM, "name") < 0) - return NULL; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return NULL; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - return token; - - fail: - free_token(token); - return NULL; -} - -static int event_read_id(void) -{ - char *token; - int id; - - if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - id = strtoul(token, NULL, 0); - free_token(token); - return id; - - fail: - free_token(token); - return -1; -} - -static int field_is_string(struct tep_format_field *field) -{ - if ((field->flags & TEP_FIELD_IS_ARRAY) && - (strstr(field->type, "char") || strstr(field->type, "u8") || - strstr(field->type, "s8"))) - return 1; - - return 0; -} - -static int field_is_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__data_loc", 10) == 0) - return 1; - - return 0; -} - -static int field_is_relative_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__rel_loc", 9) == 0) - return 1; - - return 0; -} - -static int field_is_long(struct tep_format_field *field) -{ - /* includes long long */ - if (strstr(field->type, "long")) - return 1; - - return 0; -} - -static unsigned int type_size(const char *name) -{ - /* This covers all TEP_FIELD_IS_STRING types. */ - static struct { - const char *type; - unsigned int size; - } table[] = { - { "u8", 1 }, - { "u16", 2 }, - { "u32", 4 }, - { "u64", 8 }, - { "s8", 1 }, - { "s16", 2 }, - { "s32", 4 }, - { "s64", 8 }, - { "char", 1 }, - { }, - }; - int i; - - for (i = 0; table[i].type; i++) { - if (!strcmp(table[i].type, name)) - return table[i].size; - } - - return 0; -} - -static int append(char **buf, const char *delim, const char *str) -{ - char *new_buf; - - new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1); - if (!new_buf) - return -1; - strcat(new_buf, delim); - strcat(new_buf, str); - *buf = new_buf; - return 0; -} - -static int event_read_fields(struct tep_event *event, struct tep_format_field **fields) -{ - struct tep_format_field *field = NULL; - enum tep_event_type type; - char *token; - char *last_token; - char *delim = " "; - int count = 0; - int ret; - - do { - unsigned int size_dynamic = 0; - - type = read_token(&token); - if (type == TEP_EVENT_NEWLINE) { - free_token(token); - return count; - } - - count++; - - if (test_type_token(type, token, TEP_EVENT_ITEM, "field")) - goto fail; - free_token(token); - - type = read_token(&token); - /* - * The ftrace fields may still use the "special" name. - * Just ignore it. - */ - if (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) { - free_token(token); - type = read_token(&token); - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0) - goto fail; - - free_token(token); - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - last_token = token; - - field = calloc(1, sizeof(*field)); - if (!field) - goto fail; - - field->event = event; - - /* read the rest of the type */ - for (;;) { - type = read_token(&token); - if (type == TEP_EVENT_ITEM || - (type == TEP_EVENT_OP && strcmp(token, "*") == 0) || - /* - * Some of the ftrace fields are broken and have - * an illegal "." in them. - */ - (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_OP && strcmp(token, ".") == 0)) { - - if (strcmp(token, "*") == 0) - field->flags |= TEP_FIELD_IS_POINTER; - - if (field->type) { - ret = append(&field->type, delim, last_token); - free(last_token); - if (ret < 0) - goto fail; - } else - field->type = last_token; - last_token = token; - delim = " "; - continue; - } - - /* Handle __attribute__((user)) */ - if ((type == TEP_EVENT_DELIM) && - strcmp("__attribute__", last_token) == 0 && - token[0] == '(') { - int depth = 1; - int ret; - - ret = append(&field->type, " ", last_token); - ret |= append(&field->type, "", "("); - if (ret < 0) - goto fail; - - delim = " "; - while ((type = read_token(&token)) != TEP_EVENT_NONE) { - if (type == TEP_EVENT_DELIM) { - if (token[0] == '(') - depth++; - else if (token[0] == ')') - depth--; - if (!depth) - break; - ret = append(&field->type, "", token); - delim = ""; - } else { - ret = append(&field->type, delim, token); - delim = " "; - } - if (ret < 0) - goto fail; - free(last_token); - last_token = token; - } - continue; - } - break; - } - - if (!field->type) { - do_warning_event(event, "%s: no type found", __func__); - goto fail; - } - field->name = field->alias = last_token; - - if (test_type(type, TEP_EVENT_OP)) - goto fail; - - if (strcmp(token, "[") == 0) { - enum tep_event_type last_type = type; - char *brackets = token; - - field->flags |= TEP_FIELD_IS_ARRAY; - - type = read_token(&token); - - if (type == TEP_EVENT_ITEM) - field->arraylen = strtoul(token, NULL, 0); - else - field->arraylen = 0; - - while (strcmp(token, "]") != 0) { - const char *delim; - - if (last_type == TEP_EVENT_ITEM && - type == TEP_EVENT_ITEM) - delim = " "; - else - delim = ""; - - last_type = type; - - ret = append(&brackets, delim, token); - if (ret < 0) { - free(brackets); - goto fail; - } - /* We only care about the last token */ - field->arraylen = strtoul(token, NULL, 0); - free_token(token); - type = read_token(&token); - if (type == TEP_EVENT_NONE) { - free(brackets); - do_warning_event(event, "failed to find token"); - goto fail; - } - } - - free_token(token); - - ret = append(&brackets, "", "]"); - if (ret < 0) { - free(brackets); - goto fail; - } - - /* add brackets to type */ - - type = read_token(&token); - /* - * If the next token is not an OP, then it is of - * the format: type [] item; - */ - if (type == TEP_EVENT_ITEM) { - ret = append(&field->type, " ", field->name); - if (ret < 0) { - free(brackets); - goto fail; - } - ret = append(&field->type, "", brackets); - - size_dynamic = type_size(field->name); - free_token(field->name); - field->name = field->alias = token; - type = read_token(&token); - } else { - ret = append(&field->type, "", brackets); - if (ret < 0) { - free(brackets); - goto fail; - } - } - free(brackets); - } - - if (field_is_string(field)) - field->flags |= TEP_FIELD_IS_STRING; - if (field_is_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC; - if (field_is_relative_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; - if (field_is_long(field)) - field->flags |= TEP_FIELD_IS_LONG; - - if (test_type_token(type, token, TEP_EVENT_OP, ";")) - goto fail; - free_token(token); - - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->offset = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->size = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (test_type_token(type, token, TEP_EVENT_ITEM, "signed")) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - if (strtoul(token, NULL, 0)) - field->flags |= TEP_FIELD_IS_SIGNED; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - - free_token(token); - - if (field->flags & TEP_FIELD_IS_ARRAY) { - if (field->arraylen) - field->elementsize = field->size / field->arraylen; - else if (field->flags & TEP_FIELD_IS_DYNAMIC) - field->elementsize = size_dynamic; - else if (field->flags & TEP_FIELD_IS_STRING) - field->elementsize = 1; - else if (field->flags & TEP_FIELD_IS_LONG) - field->elementsize = event->tep ? - event->tep->long_size : - sizeof(long); - } else - field->elementsize = field->size; - - *fields = field; - fields = &field->next; - - } while (1); - - return 0; - -fail: - free_token(token); -fail_expect: - if (field) { - free(field->type); - free(field->name); - free(field); - } - return -1; -} - -static int event_read_format(struct tep_event *event) -{ - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "format") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - free_token(token); - - ret = event_read_fields(event, &event->format.common_fields); - if (ret < 0) - return ret; - event->format.nr_common = ret; - - ret = event_read_fields(event, &event->format.fields); - if (ret < 0) - return ret; - event->format.nr_fields = ret; - - return 0; - - fail: - free_token(token); - return -1; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type); - -static enum tep_event_type -process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token; - - type = read_token(&token); - *tok = token; - - return process_arg_token(event, arg, tok, type); -} - -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok); - -/* - * For __print_symbolic() and __print_flags, we need to completely - * evaluate the first argument, which defines what to print next. - */ -static enum tep_event_type -process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - - type = process_arg(event, arg, tok); - - while (type == TEP_EVENT_OP) { - type = process_op(event, arg, tok); - } - - return type; -} - -static enum tep_event_type -process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg, *left, *right; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - left = alloc_arg(); - right = alloc_arg(); - - if (!arg || !left || !right) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* arg will be freed at out_free */ - free_arg(left); - free_arg(right); - goto out_free; - } - - arg->type = TEP_PRINT_OP; - arg->op.left = left; - arg->op.right = right; - - *tok = NULL; - type = process_arg(event, left, &token); - - again: - if (type == TEP_EVENT_ERROR) - goto out_free; - - /* Handle other operations in the arguments */ - if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) { - type = process_op(event, left, &token); - goto again; - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":")) - goto out_free; - - arg->op.op = token; - - type = process_arg(event, right, &token); - - top->op.right = arg; - - *tok = token; - return type; - -out_free: - /* Top may point to itself */ - top->op.right = NULL; - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_array(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* '*tok' is set to top->op.op. No need to free. */ - *tok = NULL; - return TEP_EVENT_ERROR; - } - - *tok = NULL; - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free; - - top->op.right = arg; - - free_token(token); - type = read_token_item(&token); - *tok = token; - - return type; - -out_free: - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static int get_op_prio(char *op) -{ - if (!op[1]) { - switch (op[0]) { - case '~': - case '!': - return 4; - case '*': - case '/': - case '%': - return 6; - case '+': - case '-': - return 7; - /* '>>' and '<<' are 8 */ - case '<': - case '>': - return 9; - /* '==' and '!=' are 10 */ - case '&': - return 11; - case '^': - return 12; - case '|': - return 13; - case '?': - return 16; - default: - do_warning("unknown op '%c'", op[0]); - return -1; - } - } else { - if (strcmp(op, "++") == 0 || - strcmp(op, "--") == 0) { - return 3; - } else if (strcmp(op, ">>") == 0 || - strcmp(op, "<<") == 0) { - return 8; - } else if (strcmp(op, ">=") == 0 || - strcmp(op, "<=") == 0) { - return 9; - } else if (strcmp(op, "==") == 0 || - strcmp(op, "!=") == 0) { - return 10; - } else if (strcmp(op, "&&") == 0) { - return 14; - } else if (strcmp(op, "||") == 0) { - return 15; - } else { - do_warning("unknown op '%s'", op); - return -1; - } - } -} - -static int set_op_prio(struct tep_print_arg *arg) -{ - - /* single ops are the greatest */ - if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL) - arg->op.prio = 0; - else - arg->op.prio = get_op_prio(arg->op.op); - - return arg->op.prio; -} - -/* Note, *tok does not get freed, but will most likely be saved */ -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *left, *right = NULL; - enum tep_event_type type; - char *token; - - /* the op is passed in via tok */ - token = *tok; - - if (arg->type == TEP_PRINT_OP && !arg->op.left) { - /* handle single op */ - if (token[1]) { - do_warning_event(event, "bad op token %s", token); - goto out_free; - } - switch (token[0]) { - case '~': - case '!': - case '+': - case '-': - break; - default: - do_warning_event(event, "bad op token %s", token); - goto out_free; - - } - - /* make an empty left */ - left = alloc_arg(); - if (!left) - goto out_warn_free; - - left->type = TEP_PRINT_NULL; - arg->op.left = left; - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - arg->op.right = right; - - /* do not free the token, it belongs to an op */ - *tok = NULL; - type = process_arg(event, right, tok); - - } else if (strcmp(token, "?") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_cond(event, arg, tok); - - } else if (strcmp(token, ">>") == 0 || - strcmp(token, "<<") == 0 || - strcmp(token, "&") == 0 || - strcmp(token, "|") == 0 || - strcmp(token, "&&") == 0 || - strcmp(token, "||") == 0 || - strcmp(token, "-") == 0 || - strcmp(token, "+") == 0 || - strcmp(token, "*") == 0 || - strcmp(token, "^") == 0 || - strcmp(token, "/") == 0 || - strcmp(token, "%") == 0 || - strcmp(token, "<") == 0 || - strcmp(token, ">") == 0 || - strcmp(token, "<=") == 0 || - strcmp(token, ">=") == 0 || - strcmp(token, "==") == 0 || - strcmp(token, "!=") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.right = NULL; - - if (set_op_prio(arg) == -1) { - event->flags |= TEP_EVENT_FL_FAILED; - /* arg->op.op (= token) will be freed at out_free */ - arg->op.op = NULL; - goto out_free; - } - - type = read_token_item(&token); - *tok = token; - - /* could just be a type pointer */ - if ((strcmp(arg->op.op, "*") == 0) && - type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) { - int ret; - - if (left->type != TEP_PRINT_ATOM) { - do_warning_event(event, "bad pointer type"); - goto out_free; - } - ret = append(&left->atom.atom, " ", "*"); - if (ret < 0) - goto out_warn_free; - - free(arg->op.op); - *arg = *left; - free(left); - - return type; - } - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - type = process_arg_token(event, right, tok, type); - if (type == TEP_EVENT_ERROR) { - free_arg(right); - /* token was freed in process_arg_token() via *tok */ - token = NULL; - goto out_free; - } - - if (right->type == TEP_PRINT_OP && - get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { - struct tep_print_arg tmp; - - /* rotate ops according to the priority */ - arg->op.right = right->op.left; - - tmp = *arg; - *arg = *right; - *right = tmp; - - arg->op.left = right; - } else { - arg->op.right = right; - } - - } else if (strcmp(token, "[") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_array(event, arg, tok); - - } else { - do_warning_event(event, "unknown op '%s'", token); - event->flags |= TEP_EVENT_FL_FAILED; - /* the arg is now the left side */ - goto out_free; - } - - if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) { - int prio; - - /* higher prios need to be closer to the root */ - prio = get_op_prio(*tok); - - if (prio > arg->op.prio) - return process_op(event, arg, tok); - - return process_op(event, right, tok); - } - - return type; - -out_warn_free: - do_warning_event(event, "%s: not enough memory!", __func__); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *field; - char *token; - - if (read_expected(TEP_EVENT_OP, "->") < 0) - goto out_err; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - field = token; - - arg->type = TEP_PRINT_FIELD; - arg->field.name = field; - - if (is_flag_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_FLAG; - is_flag_field = 0; - } else if (is_symbolic_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC; - is_symbolic_field = 0; - } - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static int alloc_and_process_delim(struct tep_event *event, char *next_token, - struct tep_print_arg **print_arg) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token; - int ret = 0; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - errno = ENOMEM; - return -1; - } - - type = process_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) { - errno = EINVAL; - ret = -1; - free_arg(field); - goto out_free_token; - } - - *print_arg = field; - -out_free_token: - free_token(token); - - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg); - -static unsigned long long -eval_type_str(unsigned long long val, const char *type, int pointer) -{ - int sign = 0; - char *ref; - int len; - - len = strlen(type); - - if (pointer) { - - if (type[len-1] != '*') { - do_warning("pointer expected with non pointer type"); - return val; - } - - ref = malloc(len); - if (!ref) { - do_warning("%s: not enough memory!", __func__); - return val; - } - memcpy(ref, type, len); - - /* chop off the " *" */ - ref[len - 2] = 0; - - val = eval_type_str(val, ref, 0); - free(ref); - return val; - } - - /* check if this is a pointer */ - if (type[len - 1] == '*') - return val; - - /* Try to figure out the arg size*/ - if (strncmp(type, "struct", 6) == 0) - /* all bets off */ - return val; - - if (strcmp(type, "u8") == 0) - return val & 0xff; - - if (strcmp(type, "u16") == 0) - return val & 0xffff; - - if (strcmp(type, "u32") == 0) - return val & 0xffffffff; - - if (strcmp(type, "u64") == 0 || - strcmp(type, "s64") == 0) - return val; - - if (strcmp(type, "s8") == 0) - return (unsigned long long)(char)val & 0xff; - - if (strcmp(type, "s16") == 0) - return (unsigned long long)(short)val & 0xffff; - - if (strcmp(type, "s32") == 0) - return (unsigned long long)(int)val & 0xffffffff; - - if (strncmp(type, "unsigned ", 9) == 0) { - sign = 0; - type += 9; - } - - if (strcmp(type, "char") == 0) { - if (sign) - return (unsigned long long)(char)val & 0xff; - else - return val & 0xff; - } - - if (strcmp(type, "short") == 0) { - if (sign) - return (unsigned long long)(short)val & 0xffff; - else - return val & 0xffff; - } - - if (strcmp(type, "int") == 0) { - if (sign) - return (unsigned long long)(int)val & 0xffffffff; - else - return val & 0xffffffff; - } - - return val; -} - -/* - * Try to figure out the type. - */ -static unsigned long long -eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer) -{ - if (arg->type != TEP_PRINT_TYPE) { - do_warning("expected type argument"); - return 0; - } - - return eval_type_str(val, arg->typecast.type, pointer); -} - -static int arg_num_eval(struct tep_print_arg *arg, long long *val) -{ - long long left, right; - int ret = 1; - - switch (arg->type) { - case TEP_PRINT_ATOM: - *val = strtoll(arg->atom.atom, NULL, 0); - break; - case TEP_PRINT_TYPE: - ret = arg_num_eval(arg->typecast.item, val); - if (!ret) - break; - *val = eval_type(*val, arg, 0); - break; - case TEP_PRINT_OP: - switch (arg->op.op[0]) { - case '|': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left || right; - else - *val = left | right; - break; - case '&': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left && right; - else - *val = left & right; - break; - case '<': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left < right; - break; - case '<': - *val = left << right; - break; - case '=': - *val = left <= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '>': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left > right; - break; - case '>': - *val = left >> right; - break; - case '=': - *val = left >= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '=': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - if (arg->op.op[1] != '=') { - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } else - *val = left == right; - break; - case '!': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - switch (arg->op.op[1]) { - case '=': - *val = left != right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '-': - /* check for negative */ - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left - right; - break; - case '+': - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left + right; - break; - case '~': - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = ~right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - ret = 0; - - } - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg) -{ - long long val; - static char buf[24]; - - switch (arg->type) { - case TEP_PRINT_ATOM: - return arg->atom.atom; - case TEP_PRINT_TYPE: - return arg_eval(arg->typecast.item); - case TEP_PRINT_OP: - if (!arg_num_eval(arg, &val)) - break; - sprintf(buf, "%lld", val); - return buf; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - break; - } - - return NULL; -} - -static enum tep_event_type -process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok) -{ - enum tep_event_type type; - struct tep_print_arg *arg = NULL; - struct tep_print_flag_sym *field; - char *token = *tok; - char *value; - - do { - free_token(token); - type = read_token_item(&token); - if (test_type_token(type, token, TEP_EVENT_OP, "{")) - break; - - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - field = calloc(1, sizeof(*field)); - if (!field) - goto out_free; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->value = strdup(value); - if (field->value == NULL) - goto out_free_field; - - free_arg(arg); - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "}")) - goto out_free_field; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->str = strdup(value); - if (field->str == NULL) - goto out_free_field; - free_arg(arg); - arg = NULL; - - *list = field; - list = &field->next; - - free_token(token); - type = read_token_item(&token); - } while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0); - - *tok = token; - return type; - -out_free_field: - free_flag_sym(field); -out_free: - free_arg(arg); - free_token(token); - *tok = NULL; - - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_FLAGS; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - /* Handle operations in the first argument */ - while (type == TEP_EVENT_OP) - type = process_op(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - free_token(token); - - arg->flags.field = field; - - type = read_token_item(&token); - if (event_item_type(type)) { - arg->flags.delim = token; - type = read_token_item(&token); - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_SYMBOL; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - - arg->symbol.field = field; - - type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex_common(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_print_arg_type type) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = type; - - if (alloc_and_process_delim(event, ",", &arg->hex.field)) - goto out; - - if (alloc_and_process_delim(event, ")", &arg->hex.size)) - goto free_field; - - return read_token_item(tok); - -free_field: - free_arg(arg->hex.field); - arg->hex.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX); -} - -static enum tep_event_type -process_hex_str(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR); -} - -static enum tep_event_type -process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_INT_ARRAY; - - if (alloc_and_process_delim(event, ",", &arg->int_array.field)) - goto out; - - if (alloc_and_process_delim(event, ",", &arg->int_array.count)) - goto free_field; - - if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) - goto free_size; - - return read_token_item(tok); - -free_size: - free_arg(arg->int_array.count); - arg->int_array.count = NULL; -free_field: - free_arg(arg->int_array.field); - arg->int_array.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_DYNAMIC_ARRAY; - - /* - * The item within the parenthesis is another field that holds - * the index into where the array starts. - */ - type = read_token(&token); - *tok = token; - if (type != TEP_EVENT_ITEM) - goto out_free; - - /* Find the field */ - - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(&token); - *tok = token; - if (type != TEP_EVENT_OP || strcmp(token, "[") != 0) - return type; - - free_token(token); - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return TEP_EVENT_ERROR; - } - - type = process_arg(event, arg, &token); - if (type == TEP_EVENT_ERROR) - goto out_free_arg; - - if (!test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free_arg; - - free_token(token); - type = read_token_item(tok); - return type; - - out_free_arg: - free_arg(arg); - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN; - - /* Find the field */ - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - free_token(token); - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *item_arg; - enum tep_event_type type; - char *token; - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(&token); - - /* - * If the next token is an item or another open paren, then - * this was a typecast. - */ - if (event_item_type(type) || - (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) { - - /* make this a typecast and contine */ - - /* prevous must be an atom */ - if (arg->type != TEP_PRINT_ATOM) { - do_warning_event(event, "previous needed to be TEP_PRINT_ATOM"); - goto out_free; - } - - item_arg = alloc_arg(); - if (!item_arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - goto out_free; - } - - arg->type = TEP_PRINT_TYPE; - arg->typecast.type = arg->atom.atom; - arg->typecast.item = item_arg; - type = process_arg_token(event, item_arg, &token, type); - - } - - *tok = token; - return type; - - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - - -static enum tep_event_type -process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_STRING; - arg->string.string = token; - arg->string.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_BITMASK; - arg->bitmask.bitmask = token; - arg->bitmask.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static struct tep_function_handler * -find_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - - if (!tep) - return NULL; - - for (func = tep->func_handlers; func; func = func->next) { - if (strcmp(func->name, func_name) == 0) - break; - } - - return func; -} - -static void remove_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - struct tep_function_handler **next; - - next = &tep->func_handlers; - while ((func = *next)) { - if (strcmp(func->name, func_name) == 0) { - *next = func->next; - free_func_handle(func); - break; - } - next = &func->next; - } -} - -static enum tep_event_type -process_func_handler(struct tep_event *event, struct tep_function_handler *func, - struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg **next_arg; - struct tep_print_arg *farg; - enum tep_event_type type; - char *token; - int i; - - arg->type = TEP_PRINT_FUNC; - arg->func.func = func; - - *tok = NULL; - - next_arg = &(arg->func.args); - for (i = 0; i < func->nr_args; i++) { - farg = alloc_arg(); - if (!farg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return TEP_EVENT_ERROR; - } - - type = process_arg(event, farg, &token); - if (i < (func->nr_args - 1)) { - if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) { - do_warning_event(event, - "Error: function '%s()' expects %d arguments but event %s only uses %d", - func->name, func->nr_args, - event->name, i + 1); - goto err; - } - } else { - if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) { - do_warning_event(event, - "Error: function '%s()' only expects %d arguments but event %s has more", - func->name, func->nr_args, event->name); - goto err; - } - } - - *next_arg = farg; - next_arg = &(farg->next); - free_token(token); - } - - type = read_token(&token); - *tok = token; - - return type; - -err: - free_arg(farg); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token = NULL; - - /* Handle __builtin_expect( cond, #) */ - type = process_arg(event, arg, &token); - - if (type != TEP_EVENT_DELIM || token[0] != ',') - goto out_free; - - free_token(token); - - /* We don't care what the second parameter is of the __builtin_expect() */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_function(struct tep_event *event, struct tep_print_arg *arg, - char *token, char **tok) -{ - struct tep_function_handler *func; - - if (strcmp(token, "__print_flags") == 0) { - free_token(token); - is_flag_field = 1; - return process_flags(event, arg, tok); - } - if (strcmp(token, "__print_symbolic") == 0) { - free_token(token); - is_symbolic_field = 1; - return process_symbols(event, arg, tok); - } - if (strcmp(token, "__print_hex") == 0) { - free_token(token); - return process_hex(event, arg, tok); - } - if (strcmp(token, "__print_hex_str") == 0) { - free_token(token); - return process_hex_str(event, arg, tok); - } - if (strcmp(token, "__print_array") == 0) { - free_token(token); - return process_int_array(event, arg, tok); - } - if (strcmp(token, "__get_str") == 0 || - strcmp(token, "__get_rel_str") == 0) { - free_token(token); - return process_str(event, arg, tok); - } - if (strcmp(token, "__get_bitmask") == 0 || - strcmp(token, "__get_rel_bitmask") == 0) { - free_token(token); - return process_bitmask(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array") == 0 || - strcmp(token, "__get_rel_dynamic_array") == 0) { - free_token(token); - return process_dynamic_array(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array_len") == 0 || - strcmp(token, "__get_rel_dynamic_array_len") == 0) { - free_token(token); - return process_dynamic_array_len(event, arg, tok); - } - if (strcmp(token, "__builtin_expect") == 0) { - free_token(token); - return process_builtin_expect(event, arg, tok); - } - - func = find_func_handler(event->tep, token); - if (func) { - free_token(token); - return process_func_handler(event, func, arg, tok); - } - - do_warning_event(event, "function %s not defined", token); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type) -{ - char *token; - char *atom; - - token = *tok; - - switch (type) { - case TEP_EVENT_ITEM: - if (strcmp(token, "REC") == 0) { - free_token(token); - type = process_entry(event, arg, &token); - break; - } - atom = token; - /* test the next token */ - type = read_token_item(&token); - - /* - * If the next token is a parenthesis, then this - * is a function. - */ - if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) { - free_token(token); - token = NULL; - /* this will free atom. */ - type = process_function(event, arg, atom, &token); - break; - } - /* atoms can be more than one token long */ - while (type == TEP_EVENT_ITEM) { - int ret; - - ret = append(&atom, " ", token); - if (ret < 0) { - free(atom); - *tok = NULL; - free_token(token); - return TEP_EVENT_ERROR; - } - free_token(token); - type = read_token_item(&token); - } - - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = atom; - break; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = token; - type = read_token_item(&token); - break; - case TEP_EVENT_DELIM: - if (strcmp(token, "(") == 0) { - free_token(token); - type = process_paren(event, arg, &token); - break; - } - case TEP_EVENT_OP: - /* handle single ops */ - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = NULL; - type = process_op(event, arg, &token); - - /* On error, the op is freed */ - if (type == TEP_EVENT_ERROR) - arg->op.op = NULL; - - /* return error type if errored */ - break; - - case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE: - default: - do_warning_event(event, "unexpected type %d", type); - return TEP_EVENT_ERROR; - } - *tok = token; - - return type; -} - -static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list) -{ - enum tep_event_type type = TEP_EVENT_ERROR; - struct tep_print_arg *arg; - char *token; - int args = 0; - - do { - if (type == TEP_EVENT_NEWLINE) { - type = read_token_item(&token); - continue; - } - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return -1; - } - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) { - free_token(token); - free_arg(arg); - return -1; - } - - *list = arg; - args++; - - if (type == TEP_EVENT_OP) { - type = process_op(event, arg, &token); - free_token(token); - if (type == TEP_EVENT_ERROR) { - *list = NULL; - free_arg(arg); - return -1; - } - list = &arg->next; - continue; - } - - if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) { - free_token(token); - *list = arg; - list = &arg->next; - continue; - } - break; - } while (type != TEP_EVENT_NONE); - - if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR) - free_token(token); - - return args; -} - -static int event_read_print(struct tep_event *event) -{ - enum tep_event_type type; - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "print") < 0) - return -1; - - if (read_expected(TEP_EVENT_ITEM, "fmt") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0) - goto fail; - - concat: - event->print_fmt.format = token; - event->print_fmt.args = NULL; - - /* ok to have no arg */ - type = read_token_item(&token); - - if (type == TEP_EVENT_NONE) - return 0; - - /* Handle concatenation of print lines */ - if (type == TEP_EVENT_DQUOTE) { - char *cat; - - if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) - goto fail; - free_token(token); - free_token(event->print_fmt.format); - event->print_fmt.format = NULL; - token = cat; - goto concat; - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto fail; - - free_token(token); - - ret = event_read_print_args(event, &event->print_fmt.args); - if (ret < 0) - return -1; - - return ret; - - fail: - free_token(token); - return -1; -} - -/** - * tep_find_common_field - return a common field by event - * @event: handle for the event - * @name: the name of the common field to return - * - * Returns a common field from the event by the given @name. - * This only searches the common fields and not all field. - */ -struct tep_format_field * -tep_find_common_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.common_fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_field - find a non-common field - * @event: handle for the event - * @name: the name of the non-common field - * - * Returns a non-common field by the given @name. - * This does not search common fields. - */ -struct tep_format_field * -tep_find_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_any_field - find any field by name - * @event: handle for the event - * @name: the name of the field - * - * Returns a field by the given @name. - * This searches the common field names first, then - * the non-common ones if a common one was not found. - */ -struct tep_format_field * -tep_find_any_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - format = tep_find_common_field(event, name); - if (format) - return format; - return tep_find_field(event, name); -} - -/** - * tep_read_number - read a number from data - * @tep: a handle to the trace event parser context - * @ptr: the raw data - * @size: the size of the data that holds the number - * - * Returns the number (converted to host) from the - * raw data. - */ -unsigned long long tep_read_number(struct tep_handle *tep, - const void *ptr, int size) -{ - unsigned long long val; - - switch (size) { - case 1: - return *(unsigned char *)ptr; - case 2: - return data2host2(tep, *(unsigned short *)ptr); - case 4: - return data2host4(tep, *(unsigned int *)ptr); - case 8: - memcpy(&val, (ptr), sizeof(unsigned long long)); - return data2host8(tep, val); - default: - /* BUG! */ - return 0; - } -} - -/** - * tep_read_number_field - read a number from data - * @field: a handle to the field - * @data: the raw data to read - * @value: the value to place the number in - * - * Reads raw data according to a field offset and size, - * and translates it into @value. - * - * Returns 0 on success, -1 otherwise. - */ -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value) -{ - if (!field) - return -1; - switch (field->size) { - case 1: - case 2: - case 4: - case 8: - *value = tep_read_number(field->event->tep, - data + field->offset, field->size); - return 0; - default: - return -1; - } -} - -static int get_common_info(struct tep_handle *tep, - const char *type, int *offset, int *size) -{ - struct tep_event *event; - struct tep_format_field *field; - - /* - * All events should have the same common elements. - * Pick any event to find where the type is; - */ - if (!tep->events) { - do_warning("no event_list!"); - return -1; - } - - event = tep->events[0]; - field = tep_find_common_field(event, type); - if (!field) - return -1; - - *offset = field->offset; - *size = field->size; - - return 0; -} - -static int __parse_common(struct tep_handle *tep, void *data, - int *size, int *offset, const char *name) -{ - int ret; - - if (!*size) { - ret = get_common_info(tep, name, offset, size); - if (ret < 0) - return ret; - } - return tep_read_number(tep, data + *offset, *size); -} - -static int trace_parse_common_type(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->type_size, &tep->type_offset, - "common_type"); -} - -static int parse_common_pid(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pid_size, &tep->pid_offset, - "common_pid"); -} - -static int parse_common_pc(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pc_size, &tep->pc_offset, - "common_preempt_count"); -} - -static int parse_common_flags(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->flags_size, &tep->flags_offset, - "common_flags"); -} - -static int parse_common_lock_depth(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_lock_depth"); -} - -static int parse_common_migrate_disable(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_migrate_disable"); -} - -static int events_id_cmp(const void *a, const void *b); - -/** - * tep_find_event - find an event by given id - * @tep: a handle to the trace event parser context - * @id: the id of the event - * - * Returns an event that has a given @id. - */ -struct tep_event *tep_find_event(struct tep_handle *tep, int id) -{ - struct tep_event **eventptr; - struct tep_event key; - struct tep_event *pkey = &key; - - /* Check cache first */ - if (tep->last_event && tep->last_event->id == id) - return tep->last_event; - - key.id = id; - - eventptr = bsearch(&pkey, tep->events, tep->nr_events, - sizeof(*tep->events), events_id_cmp); - - if (eventptr) { - tep->last_event = *eventptr; - return *eventptr; - } - - return NULL; -} - -/** - * tep_find_event_by_name - find an event by given name - * @tep: a handle to the trace event parser context - * @sys: the system name to search for - * @name: the name of the event to search for - * - * This returns an event with a given @name and under the system - * @sys. If @sys is NULL the first event with @name is returned. - */ -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, - const char *sys, const char *name) -{ - struct tep_event *event = NULL; - int i; - - if (tep->last_event && - strcmp(tep->last_event->name, name) == 0 && - (!sys || strcmp(tep->last_event->system, sys) == 0)) - return tep->last_event; - - for (i = 0; i < tep->nr_events; i++) { - event = tep->events[i]; - if (strcmp(event->name, name) == 0) { - if (!sys) - break; - if (strcmp(event->system, sys) == 0) - break; - } - } - if (i == tep->nr_events) - event = NULL; - - tep->last_event = event; - return event; -} - -static unsigned long long -eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - unsigned long long val = 0; - unsigned long long left, right; - struct tep_print_arg *typearg = NULL; - struct tep_print_arg *larg; - unsigned long offset; - unsigned int field_size; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return 0; - case TEP_PRINT_ATOM: - return strtoull(arg->atom.atom, NULL, 0); - case TEP_PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - if (!arg->field.field) - goto out_warning_field; - - } - /* must be a number */ - val = tep_read_number(tep, data + arg->field.field->offset, - arg->field.field->size); - break; - case TEP_PRINT_FLAGS: - case TEP_PRINT_SYMBOL: - case TEP_PRINT_INT_ARRAY: - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - break; - case TEP_PRINT_TYPE: - val = eval_num_arg(data, size, event, arg->typecast.item); - return eval_type(val, arg, 0); - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - return 0; - case TEP_PRINT_FUNC: { - struct trace_seq s; - trace_seq_init(&s); - val = process_defined_func(&s, data, size, event, arg); - trace_seq_destroy(&s); - return val; - } - case TEP_PRINT_OP: - if (strcmp(arg->op.op, "[") == 0) { - /* - * Arrays are special, since we don't want - * to read the arg as is. - */ - right = eval_num_arg(data, size, event, arg->op.right); - - /* handle typecasts */ - larg = arg->op.left; - while (larg->type == TEP_PRINT_TYPE) { - if (!typearg) - typearg = larg; - larg = larg->typecast.item; - } - - /* Default to long size */ - field_size = tep->long_size; - - switch (larg->type) { - case TEP_PRINT_DYNAMIC_ARRAY: - offset = tep_read_number(tep, - data + larg->dynarray.field->offset, - larg->dynarray.field->size); - if (larg->dynarray.field->elementsize) - field_size = larg->dynarray.field->elementsize; - /* - * The actual length of the dynamic array is stored - * in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - offset += right; - break; - case TEP_PRINT_FIELD: - if (!larg->field.field) { - larg->field.field = - tep_find_any_field(event, larg->field.name); - if (!larg->field.field) { - arg = larg; - goto out_warning_field; - } - } - field_size = larg->field.field->elementsize; - offset = larg->field.field->offset + - right * larg->field.field->elementsize; - break; - default: - goto default_op; /* oops, all bets off */ - } - val = tep_read_number(tep, - data + offset, field_size); - if (typearg) - val = eval_type(val, typearg, 1); - break; - } else if (strcmp(arg->op.op, "?") == 0) { - left = eval_num_arg(data, size, event, arg->op.left); - arg = arg->op.right; - if (left) - val = eval_num_arg(data, size, event, arg->op.left); - else - val = eval_num_arg(data, size, event, arg->op.right); - break; - } - default_op: - left = eval_num_arg(data, size, event, arg->op.left); - right = eval_num_arg(data, size, event, arg->op.right); - switch (arg->op.op[0]) { - case '!': - switch (arg->op.op[1]) { - case 0: - val = !right; - break; - case '=': - val = left != right; - break; - default: - goto out_warning_op; - } - break; - case '~': - val = ~right; - break; - case '|': - if (arg->op.op[1]) - val = left || right; - else - val = left | right; - break; - case '&': - if (arg->op.op[1]) - val = left && right; - else - val = left & right; - break; - case '<': - switch (arg->op.op[1]) { - case 0: - val = left < right; - break; - case '<': - val = left << right; - break; - case '=': - val = left <= right; - break; - default: - goto out_warning_op; - } - break; - case '>': - switch (arg->op.op[1]) { - case 0: - val = left > right; - break; - case '>': - val = left >> right; - break; - case '=': - val = left >= right; - break; - default: - goto out_warning_op; - } - break; - case '=': - if (arg->op.op[1] != '=') - goto out_warning_op; - - val = left == right; - break; - case '-': - val = left - right; - break; - case '+': - val = left + right; - break; - case '/': - val = left / right; - break; - case '%': - val = left % right; - break; - case '*': - val = left * right; - break; - default: - goto out_warning_op; - } - break; - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - val = (unsigned long long)(offset >> 16); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - /* Without [], we pass the address to the dynamic data */ - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - val = (unsigned long long)((unsigned long)data + offset); - break; - default: /* not sure what to do there */ - return 0; - } - return val; - -out_warning_op: - do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op); - return 0; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return 0; -} - -struct flag { - const char *name; - unsigned long long value; -}; - -static const struct flag flags[] = { - { "HI_SOFTIRQ", 0 }, - { "TIMER_SOFTIRQ", 1 }, - { "NET_TX_SOFTIRQ", 2 }, - { "NET_RX_SOFTIRQ", 3 }, - { "BLOCK_SOFTIRQ", 4 }, - { "IRQ_POLL_SOFTIRQ", 5 }, - { "TASKLET_SOFTIRQ", 6 }, - { "SCHED_SOFTIRQ", 7 }, - { "HRTIMER_SOFTIRQ", 8 }, - { "RCU_SOFTIRQ", 9 }, - - { "HRTIMER_NORESTART", 0 }, - { "HRTIMER_RESTART", 1 }, -}; - -static long long eval_flag(const char *flag) -{ - int i; - - /* - * Some flags in the format files do not get converted. - * If the flag is not numeric, see if it is something that - * we already know about. - */ - if (isdigit(flag[0])) - return strtoull(flag, NULL, 0); - - for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) - if (strcmp(flags[i].name, flag) == 0) - return flags[i].value; - - return -1LL; -} - -static void print_str_to_seq(struct trace_seq *s, const char *format, - int len_arg, const char *str) -{ - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); -} - -static void print_bitmask_to_seq(struct tep_handle *tep, - struct trace_seq *s, const char *format, - int len_arg, const void *data, int size) -{ - int nr_bits = size * 8; - int str_size = (nr_bits + 3) / 4; - int len = 0; - char buf[3]; - char *str; - int index; - int i; - - /* - * The kernel likes to put in commas every 32 bits, we - * can do the same. - */ - str_size += (nr_bits - 1) / 32; - - str = malloc(str_size + 1); - if (!str) { - do_warning("%s: not enough memory!", __func__); - return; - } - str[str_size] = 0; - - /* Start out with -2 for the two chars per byte */ - for (i = str_size - 2; i >= 0; i -= 2) { - /* - * data points to a bit mask of size bytes. - * In the kernel, this is an array of long words, thus - * endianness is very important. - */ - if (tep->file_bigendian) - index = size - (len + 1); - else - index = len; - - snprintf(buf, 3, "%02x", *((unsigned char *)data + index)); - memcpy(str + i, buf, 2); - len++; - if (!(len & 3) && i > 0) { - i--; - str[i] = ','; - } - } - - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); - - free(str); -} - -static void print_str_arg(struct trace_seq *s, void *data, int size, - struct tep_event *event, const char *format, - int len_arg, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - struct tep_print_flag_sym *flag; - struct tep_format_field *field; - struct printk_map *printk; - long long val, fval; - unsigned long long addr; - char *str; - unsigned char *hex; - int print; - int i, len; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return; - case TEP_PRINT_ATOM: - print_str_to_seq(s, format, len_arg, arg->atom.atom); - return; - case TEP_PRINT_FIELD: - field = arg->field.field; - if (!field) { - field = tep_find_any_field(event, arg->field.name); - if (!field) { - str = arg->field.name; - goto out_warning_field; - } - arg->field.field = field; - } - /* Zero sized fields, mean the rest of the data */ - len = field->size ? : size - field->offset; - - /* - * Some events pass in pointers. If this is not an array - * and the size is the same as long_size, assume that it - * is a pointer. - */ - if (!(field->flags & TEP_FIELD_IS_ARRAY) && - field->size == tep->long_size) { - - /* Handle heterogeneous recording and processing - * architectures - * - * CASE I: - * Traces recorded on 32-bit devices (32-bit - * addressing) and processed on 64-bit devices: - * In this case, only 32 bits should be read. - * - * CASE II: - * Traces recorded on 64 bit devices and processed - * on 32-bit devices: - * In this case, 64 bits must be read. - */ - addr = (tep->long_size == 8) ? - *(unsigned long long *)(data + field->offset) : - (unsigned long long)*(unsigned int *)(data + field->offset); - - /* Check if it matches a print format */ - printk = find_printk(tep, addr); - if (printk) - trace_seq_puts(s, printk->printk); - else - trace_seq_printf(s, "%llx", addr); - break; - } - str = malloc(len + 1); - if (!str) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return; - } - memcpy(str, data + field->offset, len); - str[len] = 0; - print_str_to_seq(s, format, len_arg, str); - free(str); - break; - case TEP_PRINT_FLAGS: - val = eval_num_arg(data, size, event, arg->flags.field); - print = 0; - for (flag = arg->flags.flags; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (!val && fval < 0) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - if (fval > 0 && (val & fval) == fval) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - print_str_to_seq(s, format, len_arg, flag->str); - print = 1; - val &= ~fval; - } - } - if (val) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - trace_seq_printf(s, "0x%llx", val); - } - break; - case TEP_PRINT_SYMBOL: - val = eval_num_arg(data, size, event, arg->symbol.field); - for (flag = arg->symbol.symbols; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (val == fval) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - } - if (!flag) - trace_seq_printf(s, "0x%llx", val); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - offset = tep_read_number(tep, - data + arg->hex.field->dynarray.field->offset, - arg->hex.field->dynarray.field->size); - hex = data + (offset & 0xffff); - } else { - field = arg->hex.field->field.field; - if (!field) { - str = arg->hex.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->hex.field->field.field = field; - } - hex = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->hex.size); - for (i = 0; i < len; i++) { - if (i && arg->type == TEP_PRINT_HEX) - trace_seq_putc(s, ' '); - trace_seq_printf(s, "%02x", hex[i]); - } - break; - - case TEP_PRINT_INT_ARRAY: { - void *num; - int el_size; - - if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - struct tep_format_field *field = - arg->int_array.field->dynarray.field; - offset = tep_read_number(tep, - data + field->offset, - field->size); - num = data + (offset & 0xffff); - } else { - field = arg->int_array.field->field.field; - if (!field) { - str = arg->int_array.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->int_array.field->field.field = field; - } - num = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->int_array.count); - el_size = eval_num_arg(data, size, event, - arg->int_array.el_size); - for (i = 0; i < len; i++) { - if (i) - trace_seq_putc(s, ' '); - - if (el_size == 1) { - trace_seq_printf(s, "%u", *(uint8_t *)num); - } else if (el_size == 2) { - trace_seq_printf(s, "%u", *(uint16_t *)num); - } else if (el_size == 4) { - trace_seq_printf(s, "%u", *(uint32_t *)num); - } else if (el_size == 8) { - trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); - } else { - trace_seq_printf(s, "BAD SIZE:%d 0x%x", - el_size, *(uint8_t *)num); - el_size = 1; - } - - num += el_size; - } - break; - } - case TEP_PRINT_TYPE: - break; - case TEP_PRINT_STRING: { - int str_offset; - - if (!arg->string.field) - arg->string.field = tep_find_any_field(event, arg->string.string); - if (!arg->string.field) - break; - - str_offset = data2host4(tep, - *(unsigned int *)(data + arg->string.field->offset)); - str_offset &= 0xffff; - if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) - str_offset += arg->string.field->offset + arg->string.field->size; - print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); - break; - } - case TEP_PRINT_BSTRING: - print_str_to_seq(s, format, len_arg, arg->string.string); - break; - case TEP_PRINT_BITMASK: { - int bitmask_offset; - int bitmask_size; - - if (!arg->bitmask.field) - arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); - if (!arg->bitmask.field) - break; - bitmask_offset = data2host4(tep, - *(unsigned int *)(data + arg->bitmask.field->offset)); - bitmask_size = bitmask_offset >> 16; - bitmask_offset &= 0xffff; - if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) - bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; - print_bitmask_to_seq(tep, s, format, len_arg, - data + bitmask_offset, bitmask_size); - break; - } - case TEP_PRINT_OP: - /* - * The only op for string should be ? : - */ - if (arg->op.op[0] != '?') - return; - val = eval_num_arg(data, size, event, arg->op.left); - if (val) - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.left); - else - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.right); - break; - case TEP_PRINT_FUNC: - process_defined_func(s, data, size, event, arg); - break; - default: - /* well... */ - break; - } - - return; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); -} - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_function_handler *func_handle = arg->func.func; - struct func_params *param; - unsigned long long *args; - unsigned long long ret; - struct tep_print_arg *farg; - struct trace_seq str; - struct save_str { - struct save_str *next; - char *str; - } *strings = NULL, *string; - int i; - - if (!func_handle->nr_args) { - ret = (*func_handle->func)(s, NULL); - goto out; - } - - farg = arg->func.args; - param = func_handle->params; - - ret = ULLONG_MAX; - args = malloc(sizeof(*args) * func_handle->nr_args); - if (!args) - goto out; - - for (i = 0; i < func_handle->nr_args; i++) { - switch (param->type) { - case TEP_FUNC_ARG_INT: - case TEP_FUNC_ARG_LONG: - case TEP_FUNC_ARG_PTR: - args[i] = eval_num_arg(data, size, event, farg); - break; - case TEP_FUNC_ARG_STRING: - trace_seq_init(&str); - print_str_arg(&str, data, size, event, "%s", -1, farg); - trace_seq_terminate(&str); - string = malloc(sizeof(*string)); - if (!string) { - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - string->next = strings; - string->str = strdup(str.buffer); - if (!string->str) { - free(string); - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - args[i] = (uintptr_t)string->str; - strings = string; - trace_seq_destroy(&str); - break; - default: - /* - * Something went totally wrong, this is not - * an input error, something in this code broke. - */ - do_warning_event(event, "Unexpected end of arguments\n"); - goto out_free; - } - farg = farg->next; - param = param->next; - } - - ret = (*func_handle->func)(s, args); -out_free: - free(args); - while (strings) { - string = strings; - strings = string->next; - free(string->str); - free(string); - } - - out: - /* TBD : handle return type here */ - return ret; -} - -static void free_args(struct tep_print_arg *args) -{ - struct tep_print_arg *next; - - while (args) { - next = args->next; - - free_arg(args); - args = next; - } -} - -static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - struct tep_format_field *field, *ip_field; - struct tep_print_arg *args, *arg, **next; - unsigned long long ip, val; - char *ptr; - void *bptr; - int vsize = 0; - - field = tep->bprint_buf_field; - ip_field = tep->bprint_ip_field; - - if (!field) { - field = tep_find_field(event, "buf"); - if (!field) { - do_warning_event(event, "can't find buffer field for binary printk"); - return NULL; - } - ip_field = tep_find_field(event, "ip"); - if (!ip_field) { - do_warning_event(event, "can't find ip field for binary printk"); - return NULL; - } - tep->bprint_buf_field = field; - tep->bprint_ip_field = ip_field; - } - - ip = tep_read_number(tep, data + ip_field->offset, ip_field->size); - - /* - * The first arg is the IP pointer. - */ - args = alloc_arg(); - if (!args) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - return NULL; - } - arg = args; - arg->next = NULL; - next = &arg->next; - - arg->type = TEP_PRINT_ATOM; - - if (asprintf(&arg->atom.atom, "%lld", ip) < 0) - goto out_free; - - /* skip the first "%ps: " */ - for (ptr = fmt + 5, bptr = data + field->offset; - bptr < data + size && *ptr; ptr++) { - int ls = 0; - - if (*ptr == '%') { - process_again: - ptr++; - switch (*ptr) { - case '%': - break; - case 'l': - ls++; - goto process_again; - case 'L': - ls = 2; - goto process_again; - case '0' ... '9': - goto process_again; - case '.': - goto process_again; - case 'z': - case 'Z': - ls = 1; - goto process_again; - case 'p': - ls = 1; - if (isalnum(ptr[1])) { - ptr++; - /* Check for special pointers */ - switch (*ptr) { - case 's': - case 'S': - case 'x': - break; - case 'f': - case 'F': - /* - * Pre-5.5 kernels use %pf and - * %pF for printing symbols - * while kernels since 5.5 use - * %pfw for fwnodes. So check - * %p[fF] isn't followed by 'w'. - */ - if (ptr[1] != 'w') - break; - /* fall through */ - default: - /* - * Older kernels do not process - * dereferenced pointers. - * Only process if the pointer - * value is a printable. - */ - if (isprint(*(char *)bptr)) - goto process_string; - } - } - /* fall through */ - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - switch (ls) { - case 0: - vsize = 4; - break; - case 1: - vsize = tep->long_size; - break; - case 2: - vsize = 8; - break; - default: - vsize = ls; /* ? */ - break; - } - /* fall through */ - case '*': - if (*ptr == '*') - vsize = 4; - - /* the pointers are always 4 bytes aligned */ - bptr = (void *)(((unsigned long)bptr + 3) & - ~3); - val = tep_read_number(tep, bptr, vsize); - bptr += vsize; - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_ATOM; - if (asprintf(&arg->atom.atom, "%lld", val) < 0) { - free(arg); - goto out_free; - } - *next = arg; - next = &arg->next; - /* - * The '*' case means that an arg is used as the length. - * We need to continue to figure out for what. - */ - if (*ptr == '*') - goto process_again; - - break; - case 's': - process_string: - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_BSTRING; - arg->string.string = strdup(bptr); - if (!arg->string.string) - goto out_free; - bptr += strlen(bptr) + 1; - *next = arg; - next = &arg->next; - default: - break; - } - } - } - - return args; - -out_free: - free_args(args); - return NULL; -} - -static char * -get_bprint_format(void *data, int size __maybe_unused, - struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - unsigned long long addr; - struct tep_format_field *field; - struct printk_map *printk; - char *format; - - field = tep->bprint_fmt_field; - - if (!field) { - field = tep_find_field(event, "fmt"); - if (!field) { - do_warning_event(event, "can't find format field for binary printk"); - return NULL; - } - tep->bprint_fmt_field = field; - } - - addr = tep_read_number(tep, data + field->offset, field->size); - - printk = find_printk(tep, addr); - if (!printk) { - if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0) - return NULL; - return format; - } - - if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0) - return NULL; - - return format; -} - -static int print_mac_arg(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; - bool reverse = false; - unsigned char *buf; - int ret = 0; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return 0; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", - arg->type); - return 0; - } - - if (format[0] == 'm') { - fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; - } else if (format[0] == 'M' && format[1] == 'F') { - fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x"; - ret++; - } - if (format[1] == 'R') { - reverse = true; - ret++; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - if (arg->field.field->size != 6) { - trace_seq_printf(s, "INVALIDMAC"); - return ret; - } - - buf = data + arg->field.field->offset; - if (reverse) - trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - - return ret; -} - -static int parse_ip4_print_args(struct tep_handle *tep, - const char *ptr, bool *reverse) -{ - int ret = 0; - - *reverse = false; - - /* hnbl */ - switch (*ptr) { - case 'h': - if (tep->file_bigendian) - *reverse = false; - else - *reverse = true; - ret++; - break; - case 'l': - *reverse = true; - ret++; - break; - case 'n': - case 'b': - ret++; - /* fall through */ - default: - *reverse = false; - break; - } - - return ret; -} - -static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf) -{ - const char *fmt; - - if (i == 'i') - fmt = "%03d.%03d.%03d.%03d"; - else - fmt = "%d.%d.%d.%d"; - - if (reverse) - trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); - -} - -static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) -{ - return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | - (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; -} - -static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) -{ - return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); -} - -static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) -{ - int i, j, range; - unsigned char zerolength[8]; - int longest = 1; - int colonpos = -1; - uint16_t word; - uint8_t hi, lo; - bool needcolon = false; - bool useIPv4; - struct in6_addr in6; - - memcpy(&in6, addr, sizeof(struct in6_addr)); - - useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); - - memset(zerolength, 0, sizeof(zerolength)); - - if (useIPv4) - range = 6; - else - range = 8; - - /* find position of longest 0 run */ - for (i = 0; i < range; i++) { - for (j = i; j < range; j++) { - if (in6.s6_addr16[j] != 0) - break; - zerolength[i]++; - } - } - for (i = 0; i < range; i++) { - if (zerolength[i] > longest) { - longest = zerolength[i]; - colonpos = i; - } - } - if (longest == 1) /* don't compress a single 0 */ - colonpos = -1; - - /* emit address */ - for (i = 0; i < range; i++) { - if (i == colonpos) { - if (needcolon || i == 0) - trace_seq_printf(s, ":"); - trace_seq_printf(s, ":"); - needcolon = false; - i += longest - 1; - continue; - } - if (needcolon) { - trace_seq_printf(s, ":"); - needcolon = false; - } - /* hex u16 without leading 0s */ - word = ntohs(in6.s6_addr16[i]); - hi = word >> 8; - lo = word & 0xff; - if (hi) - trace_seq_printf(s, "%x%02x", hi, lo); - else - trace_seq_printf(s, "%x", lo); - - needcolon = true; - } - - if (useIPv4) { - if (needcolon) - trace_seq_printf(s, ":"); - print_ip4_addr(s, 'I', false, &in6.s6_addr[12]); - } - - return; -} - -static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) -{ - int j; - - for (j = 0; j < 16; j += 2) { - trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); - if (i == 'I' && j < 14) - trace_seq_printf(s, ":"); - } -} - -/* - * %pi4 print an IPv4 address with leading zeros - * %pI4 print an IPv4 address without leading zeros - * %pi6 print an IPv6 address without colons - * %pI6 print an IPv6 address with colons - * %pI6c print an IPv6 address in compressed form with colons - * %pISpc print an IP address based on sockaddr; p adds port. - */ -static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - bool reverse = false; - unsigned char *buf; - int ret; - - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 4) { - trace_seq_printf(s, "INVALIDIPv4"); - return ret; - } - - print_ip4_addr(s, i, reverse, buf); - return ret; - -} - -static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0; - unsigned char *buf; - int rc = 0; - - /* pI6c */ - if (i == 'I' && *ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - return rc; -} - -static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0, have_p = 0; - unsigned char *buf; - struct sockaddr_storage *sa; - bool reverse = false; - int rc = 0; - int ret; - - /* pISpc */ - if (i == 'I') { - if (*ptr == 'p') { - have_p = 1; - ptr++; - rc++; - } - if (*ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - } - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - ptr += ret; - rc += ret; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - sa = (struct sockaddr_storage *) (data + arg->field.field->offset); - - if (sa->ss_family == AF_INET) { - struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in)) { - trace_seq_printf(s, "INVALIDIPv4"); - return rc; - } - - print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr); - if (have_p) - trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); - - - } else if (sa->ss_family == AF_INET6) { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in6)) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_p) - trace_seq_printf(s, "["); - - buf = (unsigned char *) &sa6->sin6_addr; - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - if (have_p) - trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); - } - - return rc; -} - -static int print_ip_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char i = *ptr; /* 'i' or 'I' */ - int rc = 1; - - /* IP version */ - ptr++; - - switch (*ptr) { - case '4': - rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg); - break; - case '6': - rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg); - break; - case 'S': - rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg); - break; - default: - return 0; - } - - return rc; -} - -static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; -static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - -static int print_uuid_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const int *index = uuid_index; - char *format = "%02x"; - int ret = 0; - char *buf; - int i; - - switch (*(ptr + 1)) { - case 'L': - format = "%02X"; - /* fall through */ - case 'l': - index = guid_index; - ret++; - break; - case 'B': - format = "%02X"; - /* fall through */ - case 'b': - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDUUID"); - return ret; - } - - buf = data + arg->field.field->offset; - - for (i = 0; i < 16; i++) { - trace_seq_printf(s, format, buf[index[i]] & 0xff); - switch (i) { - case 3: - case 5: - case 7: - case 9: - trace_seq_printf(s, "-"); - break; - } - } - - return ret; -} - -static int print_raw_buff_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg, int print_len) -{ - int plen = print_len; - char *delim = " "; - int ret = 0; - char *buf; - int i; - unsigned long offset; - int arr_len; - - switch (*(ptr + 1)) { - case 'C': - delim = ":"; - ret++; - break; - case 'D': - delim = "-"; - ret++; - break; - case 'N': - delim = ""; - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - offset = tep_read_number(event->tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - arr_len = (unsigned long long)(offset >> 16); - buf = data + (offset & 0xffff); - - if (arr_len < plen) - plen = arr_len; - - if (plen < 1) - return ret; - - trace_seq_printf(s, "%02x", buf[0] & 0xff); - for (i = 1; i < plen; i++) - trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff); - - return ret; -} - -static int is_printable_array(char *p, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len && p[i]; i++) - if (!isprint(p[i]) && !isspace(p[i])) - return 0; - return 1; -} - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field) -{ - unsigned long long val; - unsigned int offset, len, i; - struct tep_handle *tep = field->event->tep; - - if (field->flags & TEP_FIELD_IS_ARRAY) { - offset = field->offset; - len = field->size; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - val = tep_read_number(tep, data + offset, len); - offset = val; - len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } - if (field->flags & TEP_FIELD_IS_STRING && - is_printable_array(data + offset, len)) { - trace_seq_printf(s, "%s", (char *)data + offset); - } else { - trace_seq_puts(s, "ARRAY["); - for (i = 0; i < len; i++) { - if (i) - trace_seq_puts(s, ", "); - trace_seq_printf(s, "%02x", - *((unsigned char *)data + offset + i)); - } - trace_seq_putc(s, ']'); - field->flags &= ~TEP_FIELD_IS_STRING; - } - } else { - val = tep_read_number(tep, data + field->offset, - field->size); - if (field->flags & TEP_FIELD_IS_POINTER) { - trace_seq_printf(s, "0x%llx", val); - } else if (field->flags & TEP_FIELD_IS_SIGNED) { - switch (field->size) { - case 4: - /* - * If field is long then print it in hex. - * A long usually stores pointers. - */ - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%x", (int)val); - else - trace_seq_printf(s, "%d", (int)val); - break; - case 2: - trace_seq_printf(s, "%2d", (short)val); - break; - case 1: - trace_seq_printf(s, "%1d", (char)val); - break; - default: - trace_seq_printf(s, "%lld", val); - } - } else { - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%llx", val); - else - trace_seq_printf(s, "%llu", val); - } - } -} - -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event) -{ - struct tep_format_field *field; - - field = event->format.fields; - while (field) { - trace_seq_printf(s, " %s=", field->name); - tep_print_field(s, data, field); - field = field->next; - } -} - -static int print_function(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - struct func_map *func; - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - func = find_func(event->tep, val); - if (func) { - trace_seq_puts(s, func->func); - if (*format == 'F' || *format == 'S') - trace_seq_printf(s, "+0x%llx", val - func->addr); - } else { - if (event->tep->long_size == 4) - trace_seq_printf(s, "0x%lx", (long)val); - else - trace_seq_printf(s, "0x%llx", (long long)val); - } - - return 0; -} - -static int print_arg_pointer(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - int ret = 1; - - if (arg->type == TEP_PRINT_BSTRING) { - trace_seq_puts(s, arg->string.string); - return 0; - } - while (*format) { - if (*format == 'p') { - format++; - break; - } - format++; - } - - switch (*format) { - case 'F': - case 'f': - case 'S': - case 's': - ret += print_function(s, format, data, size, event, arg); - break; - case 'M': - case 'm': - ret += print_mac_arg(s, format, data, size, event, arg); - break; - case 'I': - case 'i': - ret += print_ip_arg(s, format, data, size, event, arg); - break; - case 'U': - ret += print_uuid_arg(s, format, data, size, event, arg); - break; - case 'h': - ret += print_raw_buff_arg(s, format, data, size, event, arg, plen); - break; - default: - ret = 0; - val = eval_num_arg(data, size, event, arg); - trace_seq_printf(s, "%p", (void *)(intptr_t)val); - break; - } - - return ret; - -} - -static int print_arg_number(struct trace_seq *s, const char *format, int plen, - void *data, int size, int ls, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - - switch (ls) { - case -2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (char)val); - else - trace_seq_printf(s, format, (char)val); - break; - case -1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (short)val); - else - trace_seq_printf(s, format, (short)val); - break; - case 0: - if (plen >= 0) - trace_seq_printf(s, format, plen, (int)val); - else - trace_seq_printf(s, format, (int)val); - break; - case 1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long)val); - else - trace_seq_printf(s, format, (long)val); - break; - case 2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long long)val); - else - trace_seq_printf(s, format, (long long)val); - break; - default: - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - return 0; -} - - -static void print_arg_string(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct trace_seq p; - - /* Use helper trace_seq */ - trace_seq_init(&p); - print_str_arg(&p, data, size, event, - format, plen, arg); - trace_seq_terminate(&p); - trace_seq_puts(s, p.buffer); - trace_seq_destroy(&p); -} - -static int parse_arg_format_pointer(const char *format) -{ - int ret = 0; - int index; - int loop; - - switch (*format) { - case 'F': - case 'S': - case 'f': - case 's': - ret++; - break; - case 'M': - case 'm': - /* [mM]R , [mM]F */ - switch (format[1]) { - case 'R': - case 'F': - ret++; - break; - } - ret++; - break; - case 'I': - case 'i': - index = 2; - loop = 1; - switch (format[1]) { - case 'S': - /*[S][pfs]*/ - while (loop) { - switch (format[index]) { - case 'p': - case 'f': - case 's': - ret++; - index++; - break; - default: - loop = 0; - break; - } - } - /* fall through */ - case '4': - /* [4S][hnbl] */ - switch (format[index]) { - case 'h': - case 'n': - case 'l': - case 'b': - ret++; - index++; - break; - } - if (format[1] == '4') { - ret++; - break; - } - /* fall through */ - case '6': - /* [6S]c */ - if (format[index] == 'c') - ret++; - ret++; - break; - } - ret++; - break; - case 'U': - switch (format[1]) { - case 'L': - case 'l': - case 'B': - case 'b': - ret++; - break; - } - ret++; - break; - case 'h': - switch (format[1]) { - case 'C': - case 'D': - case 'N': - ret++; - break; - } - ret++; - break; - default: - break; - } - - return ret; -} - -static void free_parse_args(struct tep_print_parse *arg) -{ - struct tep_print_parse *del; - - while (arg) { - del = arg; - arg = del->next; - free(del->format); - free(del); - } -} - -static int parse_arg_add(struct tep_print_parse **parse, char *format, - enum tep_print_parse_type type, - struct tep_print_arg *arg, - struct tep_print_arg *len_as_arg, - int ls) -{ - struct tep_print_parse *parg = NULL; - - parg = calloc(1, sizeof(*parg)); - if (!parg) - goto error; - parg->format = strdup(format); - if (!parg->format) - goto error; - parg->type = type; - parg->arg = arg; - parg->len_as_arg = len_as_arg; - parg->ls = ls; - *parse = parg; - return 0; -error: - if (parg) { - free(parg->format); - free(parg); - } - return -1; -} - -static int parse_arg_format(struct tep_print_parse **parse, - struct tep_event *event, - const char *format, struct tep_print_arg **arg) -{ - struct tep_print_arg *len_arg = NULL; - char print_format[32]; - const char *start = format; - int ret = 0; - int ls = 0; - int res; - int len; - - format++; - ret++; - for (; *format; format++) { - switch (*format) { - case '#': - /* FIXME: need to handle properly */ - break; - case 'h': - ls--; - break; - case 'l': - ls++; - break; - case 'L': - ls = 2; - break; - case '.': - case 'z': - case 'Z': - case '0' ... '9': - case '-': - break; - case '*': - /* The argument is the length. */ - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - if (len_arg) { - do_warning_event(event, "argument already matched"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - len_arg = *arg; - *arg = (*arg)->next; - break; - case 'p': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - res = parse_arg_format_pointer(format + 1); - if (res > 0) { - format += res; - ret += res; - } - len = ((unsigned long)format + 1) - - (unsigned long)start; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_POINTER, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 30) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - if (event->tep->long_size == 8 && ls == 1 && - sizeof(long) != 8) { - char *p; - - /* make %l into %ll */ - if (ls == 1 && (p = strchr(print_format, 'l'))) - memmove(p+1, p, strlen(p)+1); - ls = 2; - } - if (ls < -2 || ls > 2) { - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 's': - if (!*arg) { - do_warning_event(event, "no matching argument"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_STRING, *arg, len_arg, 0); - *arg = (*arg)->next; - ret++; - return ret; - default: - snprintf(print_format, 32, ">%c<", *format); - parse_arg_add(parse, print_format, - PRINT_FMT_STRING, NULL, NULL, 0); - ret++; - return ret; - } - ret++; - } - -out_failed: - return ret; - -} - -static int parse_arg_string(struct tep_print_parse **parse, const char *format) -{ - struct trace_seq s; - int ret = 0; - - trace_seq_init(&s); - for (; *format; format++) { - if (*format == '\\') { - format++; - ret++; - switch (*format) { - case 'n': - trace_seq_putc(&s, '\n'); - break; - case 't': - trace_seq_putc(&s, '\t'); - break; - case 'r': - trace_seq_putc(&s, '\r'); - break; - case '\\': - trace_seq_putc(&s, '\\'); - break; - default: - trace_seq_putc(&s, *format); - break; - } - } else if (*format == '%') { - if (*(format + 1) == '%') { - trace_seq_putc(&s, '%'); - format++; - ret++; - } else - break; - } else - trace_seq_putc(&s, *format); - - ret++; - } - trace_seq_terminate(&s); - parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0); - trace_seq_destroy(&s); - - return ret; -} - -static struct tep_print_parse * -parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg) -{ - struct tep_print_parse *parse_ret = NULL; - struct tep_print_parse **parse = NULL; - int ret; - int len; - - len = strlen(format); - while (*format) { - if (!parse_ret) - parse = &parse_ret; - if (*format == '%' && *(format + 1) != '%') - ret = parse_arg_format(parse, event, format, &arg); - else - ret = parse_arg_string(parse, format); - if (*parse) - parse = &((*parse)->next); - - len -= ret; - if (len > 0) - format += ret; - else - break; - } - return parse_ret; -} - -static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s, - void *data, int size, struct tep_event *event) -{ - int len_arg; - - while (parse) { - if (parse->len_as_arg) - len_arg = eval_num_arg(data, size, event, parse->len_as_arg); - switch (parse->type) { - case PRINT_FMT_ARG_DIGIT: - print_arg_number(s, parse->format, - parse->len_as_arg ? len_arg : -1, data, - size, parse->ls, event, parse->arg); - break; - case PRINT_FMT_ARG_POINTER: - print_arg_pointer(s, parse->format, - parse->len_as_arg ? len_arg : 1, - data, size, event, parse->arg); - break; - case PRINT_FMT_ARG_STRING: - print_arg_string(s, parse->format, - parse->len_as_arg ? len_arg : -1, - data, size, event, parse->arg); - break; - case PRINT_FMT_STRING: - default: - trace_seq_printf(s, "%s", parse->format); - break; - } - parse = parse->next; - } -} - -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) -{ - struct tep_print_parse *parse = event->print_fmt.print_cache; - struct tep_print_arg *args = NULL; - char *bprint_fmt = NULL; - - if (event->flags & TEP_EVENT_FL_FAILED) { - trace_seq_printf(s, "[FAILED TO PARSE]"); - tep_print_fields(s, data, size, event); - return; - } - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - parse = parse_args(event, bprint_fmt, args); - } - - print_event_cache(parse, s, data, size, event); - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - free_parse_args(parse); - free_args(args); - free(bprint_fmt); - } -} - -/* - * This parses out the Latency format (interrupts disabled, - * need rescheduling, in hard/soft interrupt, preempt count - * and lock depth) and places it into the trace_seq. - */ -static void data_latency_format(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_record *record) -{ - static int check_lock_depth = 1; - static int check_migrate_disable = 1; - static int lock_depth_exists; - static int migrate_disable_exists; - unsigned int lat_flags; - struct trace_seq sq; - unsigned int pc; - int lock_depth = 0; - int migrate_disable = 0; - int hardirq; - int softirq; - void *data = record->data; - - trace_seq_init(&sq); - lat_flags = parse_common_flags(tep, data); - pc = parse_common_pc(tep, data); - /* lock_depth may not always exist */ - if (lock_depth_exists) - lock_depth = parse_common_lock_depth(tep, data); - else if (check_lock_depth) { - lock_depth = parse_common_lock_depth(tep, data); - if (lock_depth < 0) - check_lock_depth = 0; - else - lock_depth_exists = 1; - } - - /* migrate_disable may not always exist */ - if (migrate_disable_exists) - migrate_disable = parse_common_migrate_disable(tep, data); - else if (check_migrate_disable) { - migrate_disable = parse_common_migrate_disable(tep, data); - if (migrate_disable < 0) - check_migrate_disable = 0; - else - migrate_disable_exists = 1; - } - - hardirq = lat_flags & TRACE_FLAG_HARDIRQ; - softirq = lat_flags & TRACE_FLAG_SOFTIRQ; - - trace_seq_printf(&sq, "%c%c%c", - (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (lat_flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.'); - - if (pc) - trace_seq_printf(&sq, "%x", pc); - else - trace_seq_printf(&sq, "."); - - if (migrate_disable_exists) { - if (migrate_disable < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", migrate_disable); - } - - if (lock_depth_exists) { - if (lock_depth < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", lock_depth); - } - - if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) { - s->state = TRACE_SEQ__MEM_ALLOC_FAILED; - return; - } - - trace_seq_terminate(&sq); - trace_seq_puts(s, sq.buffer); - trace_seq_destroy(&sq); - trace_seq_terminate(s); -} - -/** - * tep_data_type - parse out the given event type - * @tep: a handle to the trace event parser context - * @rec: the record to read from - * - * This returns the event id from the @rec. - */ -int tep_data_type(struct tep_handle *tep, struct tep_record *rec) -{ - return trace_parse_common_type(tep, rec->data); -} - -/** - * tep_data_pid - parse the PID from record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the PID from a record. - */ -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pid(tep, rec->data); -} - -/** - * tep_data_preempt_count - parse the preempt count from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the preempt count from a record. - */ -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pc(tep, rec->data); -} - -/** - * tep_data_flags - parse the latency flags from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the latency flags from a record. - * - * Use trace_flag_type enum for the flags (see event-parse.h). - */ -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_flags(tep, rec->data); -} - -/** - * tep_data_comm_from_pid - return the command line from PID - * @tep: a handle to the trace event parser context - * @pid: the PID of the task to search for - * - * This returns a pointer to the command line that has the given - * @pid. - */ -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid) -{ - const char *comm; - - comm = find_cmdline(tep, pid); - return comm; -} - -static struct tep_cmdline * -pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)next; - - if (cmdlist) - cmdlist = cmdlist->next; - else - cmdlist = tep->cmdlist; - - while (cmdlist && strcmp(cmdlist->comm, comm) != 0) - cmdlist = cmdlist->next; - - return (struct tep_cmdline *)cmdlist; -} - -/** - * tep_data_pid_from_comm - return the pid from a given comm - * @tep: a handle to the trace event parser context - * @comm: the cmdline to find the pid from - * @next: the cmdline structure to find the next comm - * - * This returns the cmdline structure that holds a pid for a given - * comm, or NULL if none found. As there may be more than one pid for - * a given comm, the result of this call can be passed back into - * a recurring call in the @next parameter, and then it will find the - * next pid. - * Also, it does a linear search, so it may be slow. - */ -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next) -{ - struct tep_cmdline *cmdline; - - /* - * If the cmdlines have not been converted yet, then use - * the list. - */ - if (!tep->cmdlines) - return pid_from_cmdlist(tep, comm, next); - - if (next) { - /* - * The next pointer could have been still from - * a previous call before cmdlines were created - */ - if (next < tep->cmdlines || - next >= tep->cmdlines + tep->cmdline_count) - next = NULL; - else - cmdline = next++; - } - - if (!next) - cmdline = tep->cmdlines; - - while (cmdline < tep->cmdlines + tep->cmdline_count) { - if (strcmp(cmdline->comm, comm) == 0) - return cmdline; - cmdline++; - } - return NULL; -} - -/** - * tep_cmdline_pid - return the pid associated to a given cmdline - * @tep: a handle to the trace event parser context - * @cmdline: The cmdline structure to get the pid from - * - * Returns the pid for a give cmdline. If @cmdline is NULL, then - * -1 is returned. - */ -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; - - if (!cmdline) - return -1; - - /* - * If cmdlines have not been created yet, or cmdline is - * not part of the array, then treat it as a cmdlist instead. - */ - if (!tep->cmdlines || - cmdline < tep->cmdlines || - cmdline >= tep->cmdlines + tep->cmdline_count) - return cmdlist->pid; - - return cmdline->pid; -} - -/* - * This parses the raw @data using the given @event information and - * writes the print format into the trace_seq. - */ -static void print_event_info(struct trace_seq *s, char *format, bool raw, - struct tep_event *event, struct tep_record *record) -{ - int print_pretty = 1; - - if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW)) - tep_print_fields(s, record->data, record->size, event); - else { - - if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE)) - print_pretty = event->handler(s, record, event, - event->context); - - if (print_pretty) - pretty_print(s, record->data, record->size, event); - } - - trace_seq_terminate(s); -} - -/** - * tep_find_event_by_record - return the event from a given record - * @tep: a handle to the trace event parser context - * @record: The record to get the event from - * - * Returns the associated event for a given record, or NULL if non is - * is found. - */ -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record) -{ - int type; - - if (record->size < 0) { - do_warning("ug! negative record size %d", record->size); - return NULL; - } - - type = trace_parse_common_type(tep, record->data); - - return tep_find_event(tep, type); -} - -/* - * Writes the timestamp of the record into @s. Time divisor and precision can be - * specified as part of printf @format string. Example: - * "%3.1000d" - divide the time by 1000 and print the first 3 digits - * before the dot. Thus, the timestamp "123456000" will be printed as - * "123.456" - */ -static void print_event_time(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_event *event, - struct tep_record *record) -{ - unsigned long long time; - char *divstr; - int prec = 0, pr; - int div = 0; - int p10 = 1; - - if (isdigit(*(format + 1))) - prec = atoi(format + 1); - divstr = strchr(format, '.'); - if (divstr && isdigit(*(divstr + 1))) - div = atoi(divstr + 1); - time = record->ts; - if (div) { - time += div / 2; - time /= div; - } - pr = prec; - while (pr--) - p10 *= 10; - - if (p10 > 1 && p10 < time) - trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); - else - trace_seq_printf(s, "%12llu", time); -} - -struct print_event_type { - enum { - EVENT_TYPE_INT = 1, - EVENT_TYPE_STRING, - EVENT_TYPE_UNKNOWN, - } type; - char format[32]; -}; - -static void print_string(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - const char *arg, struct print_event_type *type) -{ - const char *comm; - int pid; - - if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) { - data_latency_format(tep, s, type->format, record); - } else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) { - pid = parse_common_pid(tep, record->data); - comm = find_cmdline(tep, pid); - trace_seq_printf(s, type->format, comm); - } else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) { - print_event_info(s, type->format, true, event, record); - } else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) { - print_event_info(s, type->format, false, event, record); - } else if (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) { - trace_seq_printf(s, type->format, event->name); - } else { - trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg); - } - -} - -static void print_int(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - int arg, struct print_event_type *type) -{ - int param; - - switch (arg) { - case TEP_PRINT_CPU: - param = record->cpu; - break; - case TEP_PRINT_PID: - param = parse_common_pid(tep, record->data); - break; - case TEP_PRINT_TIME: - return print_event_time(tep, s, type->format, event, record); - default: - return; - } - trace_seq_printf(s, type->format, param); -} - -static int tep_print_event_param_type(char *format, - struct print_event_type *type) -{ - char *str = format + 1; - int i = 1; - - type->type = EVENT_TYPE_UNKNOWN; - while (*str) { - switch (*str) { - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - type->type = EVENT_TYPE_INT; - break; - case 's': - type->type = EVENT_TYPE_STRING; - break; - } - str++; - i++; - if (type->type != EVENT_TYPE_UNKNOWN) - break; - } - memset(type->format, 0, 32); - memcpy(type->format, format, i < 32 ? i : 31); - return i; -} - -/** - * tep_print_event - Write various event information - * @tep: a handle to the trace event parser context - * @s: the trace_seq to write to - * @record: The record to get the event from - * @format: a printf format string. Supported event fileds: - * TEP_PRINT_PID, "%d" - event PID - * TEP_PRINT_CPU, "%d" - event CPU - * TEP_PRINT_COMM, "%s" - event command string - * TEP_PRINT_NAME, "%s" - event name - * TEP_PRINT_LATENCY, "%s" - event latency - * TEP_PRINT_TIME, %d - event time stamp. A divisor and precision - * can be specified as part of this format string: - * "%precision.divisord". Example: - * "%3.1000d" - divide the time by 1000 and print the first - * 3 digits before the dot. Thus, the time stamp - * "123456000" will be printed as "123.456" - * TEP_PRINT_INFO, "%s" - event information. If any width is specified in - * the format string, the event information will be printed - * in raw format. - * Writes the specified event information into @s. - */ -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) -{ - struct print_event_type type; - char *format = strdup(fmt); - char *current = format; - char *str = format; - int offset; - va_list args; - struct tep_event *event; - - if (!format) - return; - - event = tep_find_event_by_record(tep, record); - va_start(args, fmt); - while (*current) { - current = strchr(str, '%'); - if (!current) { - trace_seq_puts(s, str); - break; - } - memset(&type, 0, sizeof(type)); - offset = tep_print_event_param_type(current, &type); - *current = '\0'; - trace_seq_puts(s, str); - current += offset; - switch (type.type) { - case EVENT_TYPE_STRING: - print_string(tep, s, record, event, - va_arg(args, char*), &type); - break; - case EVENT_TYPE_INT: - print_int(tep, s, record, event, - va_arg(args, int), &type); - break; - case EVENT_TYPE_UNKNOWN: - default: - trace_seq_printf(s, "[UNKNOWN TYPE]"); - break; - } - str = current; - - } - va_end(args); - free(format); -} - -static int events_id_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - - if ((*ea)->id < (*eb)->id) - return -1; - - if ((*ea)->id > (*eb)->id) - return 1; - - return 0; -} - -static int events_name_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static int events_system_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static struct tep_event **list_events_copy(struct tep_handle *tep) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = malloc(sizeof(*events) * (tep->nr_events + 1)); - if (!events) - return NULL; - - memcpy(events, tep->events, sizeof(*events) * tep->nr_events); - events[tep->nr_events] = NULL; - return events; -} - -static void list_events_sort(struct tep_event **events, int nr_events, - enum tep_event_sort_type sort_type) -{ - int (*sort)(const void *a, const void *b); - - switch (sort_type) { - case TEP_EVENT_SORT_ID: - sort = events_id_cmp; - break; - case TEP_EVENT_SORT_NAME: - sort = events_name_cmp; - break; - case TEP_EVENT_SORT_SYSTEM: - sort = events_system_cmp; - break; - default: - sort = NULL; - } - - if (sort) - qsort(events, nr_events, sizeof(*events), sort); -} - -/** - * tep_list_events - Get events, sorted by given criteria. - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * memory must not be freed, it is managed by the library. - * The function is not thread safe. - */ -struct tep_event **tep_list_events(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = tep->sort_events; - if (events && tep->last_type == sort_type) - return events; - - if (!events) { - events = list_events_copy(tep); - if (!events) - return NULL; - - tep->sort_events = events; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) { - tep->last_type = sort_type; - return events; - } - } - - list_events_sort(events, tep->nr_events, sort_type); - tep->last_type = sort_type; - - return events; -} - - -/** - * tep_list_events_copy - Thread safe version of tep_list_events() - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * array is newly allocated inside the function and must be freed by the caller - */ -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = list_events_copy(tep); - if (!events) - return NULL; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) - return events; - - list_events_sort(events, tep->nr_events, sort_type); - - return events; -} - -static struct tep_format_field ** -get_event_fields(const char *type, const char *name, - int count, struct tep_format_field *list) -{ - struct tep_format_field **fields; - struct tep_format_field *field; - int i = 0; - - fields = malloc(sizeof(*fields) * (count + 1)); - if (!fields) - return NULL; - - for (field = list; field; field = field->next) { - fields[i++] = field; - if (i == count + 1) { - do_warning("event %s has more %s fields than specified", - name, type); - i--; - break; - } - } - - if (i != count) - do_warning("event %s has less %s fields than specified", - name, type); - - fields[i] = NULL; - - return fields; -} - -/** - * tep_event_common_fields - return a list of common fields for an event - * @event: the event to return the common fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_common_fields(struct tep_event *event) -{ - return get_event_fields("common", event->name, - event->format.nr_common, - event->format.common_fields); -} - -/** - * tep_event_fields - return a list of event specific fields for an event - * @event: the event to return the fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_fields(struct tep_event *event) -{ - return get_event_fields("event", event->name, - event->format.nr_fields, - event->format.fields); -} - -static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field) -{ - trace_seq_printf(s, "{ %s, %s }", field->value, field->str); - if (field->next) { - trace_seq_puts(s, ", "); - print_fields(s, field->next); - } -} - -/* for debugging */ -static void print_args(struct tep_print_arg *args) -{ - int print_paren = 1; - struct trace_seq s; - - switch (args->type) { - case TEP_PRINT_NULL: - printf("null"); - break; - case TEP_PRINT_ATOM: - printf("%s", args->atom.atom); - break; - case TEP_PRINT_FIELD: - printf("REC->%s", args->field.name); - break; - case TEP_PRINT_FLAGS: - printf("__print_flags("); - print_args(args->flags.field); - printf(", %s, ", args->flags.delim); - trace_seq_init(&s); - print_fields(&s, args->flags.flags); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_SYMBOL: - printf("__print_symbolic("); - print_args(args->symbol.field); - printf(", "); - trace_seq_init(&s); - print_fields(&s, args->symbol.symbols); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_HEX: - printf("__print_hex("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_HEX_STR: - printf("__print_hex_str("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_INT_ARRAY: - printf("__print_array("); - print_args(args->int_array.field); - printf(", "); - print_args(args->int_array.count); - printf(", "); - print_args(args->int_array.el_size); - printf(")"); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - printf("__get_str(%s)", args->string.string); - break; - case TEP_PRINT_BITMASK: - printf("__get_bitmask(%s)", args->bitmask.bitmask); - break; - case TEP_PRINT_TYPE: - printf("(%s)", args->typecast.type); - print_args(args->typecast.item); - break; - case TEP_PRINT_OP: - if (strcmp(args->op.op, ":") == 0) - print_paren = 0; - if (print_paren) - printf("("); - print_args(args->op.left); - printf(" %s ", args->op.op); - print_args(args->op.right); - if (print_paren) - printf(")"); - break; - default: - /* we should warn... */ - return; - } - if (args->next) { - printf("\n"); - print_args(args->next); - } -} - -static void parse_header_field(const char *field, - int *offset, int *size, int mandatory) -{ - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - char *token; - int type; - - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - if (read_expected(TEP_EVENT_ITEM, "field") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - /* type */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - free_token(token); - - /* - * If this is not a mandatory field, then test it first. - */ - if (mandatory) { - if (read_expected(TEP_EVENT_ITEM, field) < 0) - return; - } else { - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - if (strcmp(token, field) != 0) - goto discard; - free_token(token); - } - - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *offset = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *size = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (type != TEP_EVENT_ITEM) - goto fail; - - if (strcmp(token, "signed") != 0) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - fail: - free_token(token); - return; - - discard: - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - *offset = 0; - *size = 0; - free_token(token); -} - -/** - * tep_parse_header_page - parse the data stored in the header page - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the header page format string - * @size: the size of @buf - * @long_size: the long size to use if there is no header - * - * This parses the header page format for information on the - * ring buffer used. The @buf should be copied from - * - * /sys/kernel/debug/tracing/events/header_page - */ -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size) -{ - int ignore; - - if (!size) { - /* - * Old kernels did not have header page info. - * Sorry but we just use what we find here in user space. - */ - tep->header_page_ts_size = sizeof(long long); - tep->header_page_size_size = long_size; - tep->header_page_data_offset = sizeof(long long) + long_size; - tep->old_format = 1; - return -1; - } - init_input_buf(buf, size); - - parse_header_field("timestamp", &tep->header_page_ts_offset, - &tep->header_page_ts_size, 1); - parse_header_field("commit", &tep->header_page_size_offset, - &tep->header_page_size_size, 1); - parse_header_field("overwrite", &tep->header_page_overwrite, - &ignore, 0); - parse_header_field("data", &tep->header_page_data_offset, - &tep->header_page_data_size, 1); - - return 0; -} - -static int event_matches(struct tep_event *event, - int id, const char *sys_name, - const char *event_name) -{ - if (id >= 0 && id != event->id) - return 0; - - if (event_name && (strcmp(event_name, event->name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return 0; - - return 1; -} - -static void free_handler(struct event_handler *handle) -{ - free((void *)handle->sys_name); - free((void *)handle->event_name); - free(handle); -} - -static int find_event_handle(struct tep_handle *tep, struct tep_event *event) -{ - struct event_handler *handle, **next; - - for (next = &tep->handlers; *next; - next = &(*next)->next) { - handle = *next; - if (event_matches(event, handle->id, - handle->sys_name, - handle->event_name)) - break; - } - - if (!(*next)) - return 0; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = handle->func; - event->context = handle->context; - - *next = handle->next; - free_handler(handle); - - return 1; -} - -/** - * parse_format - parse the event format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -static enum tep_errno parse_format(struct tep_event **eventp, - struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event; - int ret; - - init_input_buf(buf, size); - - *eventp = event = alloc_event(); - if (!event) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - event->name = event_read_name(); - if (!event->name) { - /* Bad event? */ - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - if (strcmp(sys, "ftrace") == 0) { - event->flags |= TEP_EVENT_FL_ISFTRACE; - - if (strcmp(event->name, "bprint") == 0) - event->flags |= TEP_EVENT_FL_ISBPRINT; - } - - event->id = event_read_id(); - if (event->id < 0) { - ret = TEP_ERRNO__READ_ID_FAILED; - /* - * This isn't an allocation error actually. - * But as the ID is critical, just bail out. - */ - goto event_alloc_failed; - } - - event->system = strdup(sys); - if (!event->system) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - /* Add tep to event so that it can be referenced */ - event->tep = tep; - - ret = event_read_format(event); - if (ret < 0) { - ret = TEP_ERRNO__READ_FORMAT_FAILED; - goto event_parse_failed; - } - - /* - * If the event has an override, don't print warnings if the event - * print format fails to parse. - */ - if (tep && find_event_handle(tep, event)) - show_warning = 0; - - ret = event_read_print(event); - show_warning = 1; - - if (ret < 0) { - ret = TEP_ERRNO__READ_PRINT_FAILED; - goto event_parse_failed; - } - - if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) { - struct tep_format_field *field; - struct tep_print_arg *arg, **list; - - /* old ftrace had no args */ - list = &event->print_fmt.args; - for (field = event->format.fields; field; field = field->next) { - arg = alloc_arg(); - if (!arg) { - event->flags |= TEP_EVENT_FL_FAILED; - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->type = TEP_PRINT_FIELD; - arg->field.name = strdup(field->name); - if (!arg->field.name) { - event->flags |= TEP_EVENT_FL_FAILED; - free_arg(arg); - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->field.field = field; - *list = arg; - list = &arg->next; - } - } - - if (!(event->flags & TEP_EVENT_FL_ISBPRINT)) - event->print_fmt.print_cache = parse_args(event, - event->print_fmt.format, - event->print_fmt.args); - - return 0; - - event_parse_failed: - event->flags |= TEP_EVENT_FL_FAILED; - return ret; - - event_alloc_failed: - free(event->system); - free(event->name); - free(event); - *eventp = NULL; - return ret; -} - -static enum tep_errno -__parse_event(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, unsigned long size, - const char *sys) -{ - int ret = parse_format(eventp, tep, buf, size, sys); - struct tep_event *event = *eventp; - - if (event == NULL) - return ret; - - if (tep && add_event(tep, event)) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_add_failed; - } - -#define PRINT_ARGS 0 - if (PRINT_ARGS && event->print_fmt.args) - print_args(event->print_fmt.args); - - return 0; - -event_add_failed: - free_tep_event(event); - return ret; -} - -/** - * tep_parse_format - parse the event format - * @tep: a handle to the trace event parser context - * @eventp: returned format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys) -{ - return __parse_event(tep, eventp, buf, size, sys); -} - -/** - * tep_parse_event - parse the event format - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event = NULL; - return __parse_event(tep, &event, buf, size, sys); -} - -int get_field_val(struct trace_seq *s, struct tep_format_field *field, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - if (!field) { - if (err) - trace_seq_printf(s, "", name); - return -1; - } - - if (tep_read_number_field(field, record->data, val)) { - if (err) - trace_seq_printf(s, " %s=INVALID", name); - return -1; - } - - return 0; -} - -/** - * tep_get_field_raw - return the raw pointer into the data field - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @len: place to store the field length. - * @err: print default error if failed. - * - * Returns a pointer into record->data of the field and places - * the length of the field in @len. - * - * On failure, it returns NULL. - */ -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err) -{ - struct tep_format_field *field; - void *data = record->data; - unsigned offset; - int dummy; - - if (!event) - return NULL; - - field = tep_find_field(event, name); - - if (!field) { - if (err) - trace_seq_printf(s, "", name); - return NULL; - } - - /* Allow @len to be NULL */ - if (!len) - len = &dummy; - - offset = field->offset; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - offset = tep_read_number(event->tep, - data + offset, field->size); - *len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } else - *len = field->size; - - return data + offset; -} - -/** - * tep_get_field_val - find a field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_common_field_val - find a common field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_common_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_any_field_val - find a any field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_any_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_print_num_field - print a field and a format - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - unsigned long long val; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - return trace_seq_printf(s, fmt, val); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -/** - * tep_print_func_field - print a field and a format for function pointers - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - struct tep_handle *tep = event->tep; - unsigned long long val; - struct func_map *func; - char tmp[128]; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - func = find_func(tep, val); - - if (func) - snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); - else - sprintf(tmp, "0x%08llx", val); - - return trace_seq_printf(s, fmt, tmp); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -static void free_func_handle(struct tep_function_handler *func) -{ - struct func_params *params; - - free(func->name); - - while (func->params) { - params = func->params; - func->params = params->next; - free(params); - } - - free(func); -} - -/** - * tep_register_print_function - register a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @ret_type: the return type of the helper function - * @name: the name of the helper function - * @parameters: A list of enum tep_func_arg_type - * - * Some events may have helper functions in the print format arguments. - * This allows a plugin to dynamically create a way to process one - * of these functions. - * - * The @parameters is a variable list of tep_func_arg_type enums that - * must end with TEP_FUNC_ARG_VOID. - */ -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...) -{ - struct tep_function_handler *func_handle; - struct func_params **next_param; - struct func_params *param; - enum tep_func_arg_type type; - va_list ap; - int ret; - - func_handle = find_func_handler(tep, name); - if (func_handle) { - /* - * This is most like caused by the users own - * plugins updating the function. This overrides the - * system defaults. - */ - pr_stat("override of function helper '%s'", name); - remove_func_handler(tep, name); - } - - func_handle = calloc(1, sizeof(*func_handle)); - if (!func_handle) { - do_warning("Failed to allocate function handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - func_handle->ret_type = ret_type; - func_handle->name = strdup(name); - func_handle->func = func; - if (!func_handle->name) { - do_warning("Failed to allocate function name"); - free(func_handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - next_param = &(func_handle->params); - va_start(ap, name); - for (;;) { - type = va_arg(ap, enum tep_func_arg_type); - if (type == TEP_FUNC_ARG_VOID) - break; - - if (type >= TEP_FUNC_ARG_MAX_TYPES) { - do_warning("Invalid argument type %d", type); - ret = TEP_ERRNO__INVALID_ARG_TYPE; - goto out_free; - } - - param = malloc(sizeof(*param)); - if (!param) { - do_warning("Failed to allocate function param"); - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto out_free; - } - param->type = type; - param->next = NULL; - - *next_param = param; - next_param = &(param->next); - - func_handle->nr_args++; - } - va_end(ap); - - func_handle->next = tep->func_handlers; - tep->func_handlers = func_handle; - - return 0; - out_free: - va_end(ap); - free_func_handle(func_handle); - return ret; -} - -/** - * tep_unregister_print_function - unregister a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @name: the name of the helper function - * - * This function removes existing print handler for function @name. - * - * Returns 0 if the handler was removed successully, -1 otherwise. - */ -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name) -{ - struct tep_function_handler *func_handle; - - func_handle = find_func_handler(tep, name); - if (func_handle && func_handle->func == func) { - remove_func_handler(tep, name); - return 0; - } - return -1; -} - -static struct tep_event *search_event(struct tep_handle *tep, int id, - const char *sys_name, - const char *event_name) -{ - struct tep_event *event; - - if (id >= 0) { - /* search by id */ - event = tep_find_event(tep, id); - if (!event) - return NULL; - if (event_name && (strcmp(event_name, event->name) != 0)) - return NULL; - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return NULL; - } else { - event = tep_find_event_by_name(tep, sys_name, event_name); - if (!event) - return NULL; - } - return event; -} - -/** - * tep_register_event_handler - register a way to parse an event - * @tep: a handle to the trace event parser context - * @id: the id of the event to register - * @sys_name: the system name the event belongs to - * @event_name: the name of the event - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function allows a developer to override the parsing of - * a given event. If for some reason the default print format - * is not sufficient, this function will register a function - * for an event to be used to parse the data instead. - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns: - * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten - * TEP_REGISTER_SUCCESS if a new handler is registered successfully - * negative TEP_ERRNO_... in case of an error - * - */ -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = func; - event->context = context; - return TEP_REGISTER_SUCCESS_OVERWRITE; - - not_found: - /* Save for later use. */ - handle = calloc(1, sizeof(*handle)); - if (!handle) { - do_warning("Failed to allocate event handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->id = id; - if (event_name) - handle->event_name = strdup(event_name); - if (sys_name) - handle->sys_name = strdup(sys_name); - - if ((event_name && !handle->event_name) || - (sys_name && !handle->sys_name)) { - do_warning("Failed to allocate event/sys name"); - free((void *)handle->event_name); - free((void *)handle->sys_name); - free(handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->func = func; - handle->next = tep->handlers; - tep->handlers = handle; - handle->context = context; - - return TEP_REGISTER_SUCCESS; -} - -static int handle_matches(struct event_handler *handler, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - if (id >= 0 && id != handler->id) - return 0; - - if (event_name && (strcmp(event_name, handler->event_name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, handler->sys_name) != 0)) - return 0; - - if (func != handler->func || context != handler->context) - return 0; - - return 1; -} - -/** - * tep_unregister_event_handler - unregister an existing event handler - * @tep: a handle to the trace event parser context - * @id: the id of the event to unregister - * @sys_name: the system name the handler belongs to - * @event_name: the name of the event handler - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function removes existing event handler (parser). - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns 0 if handler was removed successfully, -1 if event was not found. - */ -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - struct event_handler **next; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - if (event->handler == func && event->context == context) { - pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.", - event->id, event->system, event->name); - - event->handler = NULL; - event->context = NULL; - return 0; - } - -not_found: - for (next = &tep->handlers; *next; next = &(*next)->next) { - handle = *next; - if (handle_matches(handle, id, sys_name, event_name, - func, context)) - break; - } - - if (!(*next)) - return -1; - - *next = handle->next; - free_handler(handle); - - return 0; -} - -/** - * tep_alloc - create a tep handle - */ -struct tep_handle *tep_alloc(void) -{ - struct tep_handle *tep = calloc(1, sizeof(*tep)); - - if (tep) { - tep->ref_count = 1; - tep->host_bigendian = tep_is_bigendian(); - } - - return tep; -} - -void tep_ref(struct tep_handle *tep) -{ - tep->ref_count++; -} - -int tep_get_ref(struct tep_handle *tep) -{ - if (tep) - return tep->ref_count; - return 0; -} - -__hidden void free_tep_format_field(struct tep_format_field *field) -{ - free(field->type); - if (field->alias != field->name) - free(field->alias); - free(field->name); - free(field); -} - -static void free_format_fields(struct tep_format_field *field) -{ - struct tep_format_field *next; - - while (field) { - next = field->next; - free_tep_format_field(field); - field = next; - } -} - -static void free_formats(struct tep_format *format) -{ - free_format_fields(format->common_fields); - free_format_fields(format->fields); -} - -__hidden void free_tep_event(struct tep_event *event) -{ - free(event->name); - free(event->system); - - free_formats(&event->format); - - free(event->print_fmt.format); - free_args(event->print_fmt.args); - free_parse_args(event->print_fmt.print_cache); - free(event); -} - -/** - * tep_free - free a tep handle - * @tep: the tep handle to free - */ -void tep_free(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist, *cmdnext; - struct func_list *funclist, *funcnext; - struct printk_list *printklist, *printknext; - struct tep_function_handler *func_handler; - struct event_handler *handle; - int i; - - if (!tep) - return; - - cmdlist = tep->cmdlist; - funclist = tep->funclist; - printklist = tep->printklist; - - tep->ref_count--; - if (tep->ref_count) - return; - - if (tep->cmdlines) { - for (i = 0; i < tep->cmdline_count; i++) - free(tep->cmdlines[i].comm); - free(tep->cmdlines); - } - - while (cmdlist) { - cmdnext = cmdlist->next; - free(cmdlist->comm); - free(cmdlist); - cmdlist = cmdnext; - } - - if (tep->func_map) { - for (i = 0; i < (int)tep->func_count; i++) { - free(tep->func_map[i].func); - free(tep->func_map[i].mod); - } - free(tep->func_map); - } - - while (funclist) { - funcnext = funclist->next; - free(funclist->func); - free(funclist->mod); - free(funclist); - funclist = funcnext; - } - - while (tep->func_handlers) { - func_handler = tep->func_handlers; - tep->func_handlers = func_handler->next; - free_func_handle(func_handler); - } - - if (tep->printk_map) { - for (i = 0; i < (int)tep->printk_count; i++) - free(tep->printk_map[i].printk); - free(tep->printk_map); - } - - while (printklist) { - printknext = printklist->next; - free(printklist->printk); - free(printklist); - printklist = printknext; - } - - for (i = 0; i < tep->nr_events; i++) - free_tep_event(tep->events[i]); - - while (tep->handlers) { - handle = tep->handlers; - tep->handlers = handle->next; - free_handler(handle); - } - - free(tep->events); - free(tep->sort_events); - free(tep->func_resolver); - free_tep_plugin_paths(tep); - - free(tep); -} - -void tep_unref(struct tep_handle *tep) -{ - tep_free(tep); -} diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h deleted file mode 100644 index 41d4f9f6a843..000000000000 --- a/tools/lib/traceevent/event-parse.h +++ /dev/null @@ -1,750 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H - -#include -#include -#include -#include -#include - -#include "trace-seq.h" - -#ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) -#endif - -#ifndef DEBUG_RECORD -#define DEBUG_RECORD 0 -#endif - -struct tep_record { - unsigned long long ts; - unsigned long long offset; - long long missed_events; /* buffer dropped events before */ - int record_size; /* size of binary record */ - int size; /* size of data */ - void *data; - int cpu; - int ref_count; - int locked; /* Do not free, even if ref_count is zero */ - void *priv; -#if DEBUG_RECORD - struct tep_record *prev; - struct tep_record *next; - long alloc_addr; -#endif -}; - -/* ----------------------- tep ----------------------- */ - -struct tep_handle; -struct tep_event; - -typedef int (*tep_event_handler_func)(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, - void *context); - -typedef int (*tep_plugin_load_func)(struct tep_handle *tep); -typedef int (*tep_plugin_unload_func)(struct tep_handle *tep); - -struct tep_plugin_option { - struct tep_plugin_option *next; - void *handle; - char *file; - char *name; - char *plugin_alias; - char *description; - const char *value; - void *priv; - int set; -}; - -/* - * Plugin hooks that can be called: - * - * TEP_PLUGIN_LOADER: (required) - * The function name to initialized the plugin. - * - * int TEP_PLUGIN_LOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_UNLOADER: (optional) - * The function called just before unloading - * - * int TEP_PLUGIN_UNLOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_OPTIONS: (optional) - * Plugin options that can be set before loading - * - * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = { - * { - * .name = "option-name", - * .plugin_alias = "override-file-name", (optional) - * .description = "description of option to show users", - * }, - * { - * .name = NULL, - * }, - * }; - * - * Array must end with .name = NULL; - * - * - * .plugin_alias is used to give a shorter name to access - * the vairable. Useful if a plugin handles more than one event. - * - * If .value is not set, then it is considered a boolean and only - * .set will be processed. If .value is defined, then it is considered - * a string option and .set will be ignored. - * - * TEP_PLUGIN_ALIAS: (optional) - * The name to use for finding options (uses filename if not defined) - */ -#define TEP_PLUGIN_LOADER tep_plugin_loader -#define TEP_PLUGIN_UNLOADER tep_plugin_unloader -#define TEP_PLUGIN_OPTIONS tep_plugin_options -#define TEP_PLUGIN_ALIAS tep_plugin_alias -#define _MAKE_STR(x) #x -#define MAKE_STR(x) _MAKE_STR(x) -#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER) -#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER) -#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS) -#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS) - -enum tep_format_flags { - TEP_FIELD_IS_ARRAY = 1, - TEP_FIELD_IS_POINTER = 2, - TEP_FIELD_IS_SIGNED = 4, - TEP_FIELD_IS_STRING = 8, - TEP_FIELD_IS_DYNAMIC = 16, - TEP_FIELD_IS_LONG = 32, - TEP_FIELD_IS_FLAG = 64, - TEP_FIELD_IS_SYMBOLIC = 128, - TEP_FIELD_IS_RELATIVE = 256, -}; - -struct tep_format_field { - struct tep_format_field *next; - struct tep_event *event; - char *type; - char *name; - char *alias; - int offset; - int size; - unsigned int arraylen; - unsigned int elementsize; - unsigned long flags; -}; - -struct tep_format { - int nr_common; - int nr_fields; - struct tep_format_field *common_fields; - struct tep_format_field *fields; -}; - -struct tep_print_arg_atom { - char *atom; -}; - -struct tep_print_arg_string { - char *string; - struct tep_format_field *field; -}; - -struct tep_print_arg_bitmask { - char *bitmask; - struct tep_format_field *field; -}; - -struct tep_print_arg_field { - char *name; - struct tep_format_field *field; -}; - -struct tep_print_flag_sym { - struct tep_print_flag_sym *next; - char *value; - char *str; -}; - -struct tep_print_arg_typecast { - char *type; - struct tep_print_arg *item; -}; - -struct tep_print_arg_flags { - struct tep_print_arg *field; - char *delim; - struct tep_print_flag_sym *flags; -}; - -struct tep_print_arg_symbol { - struct tep_print_arg *field; - struct tep_print_flag_sym *symbols; -}; - -struct tep_print_arg_hex { - struct tep_print_arg *field; - struct tep_print_arg *size; -}; - -struct tep_print_arg_int_array { - struct tep_print_arg *field; - struct tep_print_arg *count; - struct tep_print_arg *el_size; -}; - -struct tep_print_arg_dynarray { - struct tep_format_field *field; - struct tep_print_arg *index; -}; - -struct tep_print_arg; - -struct tep_print_arg_op { - char *op; - int prio; - struct tep_print_arg *left; - struct tep_print_arg *right; -}; - -struct tep_function_handler; - -struct tep_print_arg_func { - struct tep_function_handler *func; - struct tep_print_arg *args; -}; - -enum tep_print_arg_type { - TEP_PRINT_NULL, - TEP_PRINT_ATOM, - TEP_PRINT_FIELD, - TEP_PRINT_FLAGS, - TEP_PRINT_SYMBOL, - TEP_PRINT_HEX, - TEP_PRINT_INT_ARRAY, - TEP_PRINT_TYPE, - TEP_PRINT_STRING, - TEP_PRINT_BSTRING, - TEP_PRINT_DYNAMIC_ARRAY, - TEP_PRINT_OP, - TEP_PRINT_FUNC, - TEP_PRINT_BITMASK, - TEP_PRINT_DYNAMIC_ARRAY_LEN, - TEP_PRINT_HEX_STR, -}; - -struct tep_print_arg { - struct tep_print_arg *next; - enum tep_print_arg_type type; - union { - struct tep_print_arg_atom atom; - struct tep_print_arg_field field; - struct tep_print_arg_typecast typecast; - struct tep_print_arg_flags flags; - struct tep_print_arg_symbol symbol; - struct tep_print_arg_hex hex; - struct tep_print_arg_int_array int_array; - struct tep_print_arg_func func; - struct tep_print_arg_string string; - struct tep_print_arg_bitmask bitmask; - struct tep_print_arg_op op; - struct tep_print_arg_dynarray dynarray; - }; -}; - -struct tep_print_parse; - -struct tep_print_fmt { - char *format; - struct tep_print_arg *args; - struct tep_print_parse *print_cache; -}; - -struct tep_event { - struct tep_handle *tep; - char *name; - int id; - int flags; - struct tep_format format; - struct tep_print_fmt print_fmt; - char *system; - tep_event_handler_func handler; - void *context; -}; - -enum { - TEP_EVENT_FL_ISFTRACE = 0x01, - TEP_EVENT_FL_ISPRINT = 0x02, - TEP_EVENT_FL_ISBPRINT = 0x04, - TEP_EVENT_FL_ISFUNCENT = 0x10, - TEP_EVENT_FL_ISFUNCRET = 0x20, - TEP_EVENT_FL_NOHANDLE = 0x40, - TEP_EVENT_FL_PRINTRAW = 0x80, - - TEP_EVENT_FL_FAILED = 0x80000000 -}; - -enum tep_event_sort_type { - TEP_EVENT_SORT_ID, - TEP_EVENT_SORT_NAME, - TEP_EVENT_SORT_SYSTEM, -}; - -enum tep_event_type { - TEP_EVENT_ERROR, - TEP_EVENT_NONE, - TEP_EVENT_SPACE, - TEP_EVENT_NEWLINE, - TEP_EVENT_OP, - TEP_EVENT_DELIM, - TEP_EVENT_ITEM, - TEP_EVENT_DQUOTE, - TEP_EVENT_SQUOTE, -}; - -typedef unsigned long long (*tep_func_handler)(struct trace_seq *s, - unsigned long long *args); - -enum tep_func_arg_type { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -enum tep_flag { - TEP_NSEC_OUTPUT = 1, /* output in NSECS */ - TEP_DISABLE_SYS_PLUGINS = 1 << 1, - TEP_DISABLE_PLUGINS = 1 << 2, -}; - -#define TEP_ERRORS \ - _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ - _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ - _PE(READ_ID_FAILED, "failed to read event id"), \ - _PE(READ_FORMAT_FAILED, "failed to read event format"), \ - _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ - _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ - _PE(INVALID_ARG_TYPE, "invalid argument type"), \ - _PE(INVALID_EXP_TYPE, "invalid expression type"), \ - _PE(INVALID_OP_TYPE, "invalid operator type"), \ - _PE(INVALID_EVENT_NAME, "invalid event name"), \ - _PE(EVENT_NOT_FOUND, "no event found"), \ - _PE(SYNTAX_ERROR, "syntax error"), \ - _PE(ILLEGAL_RVALUE, "illegal rvalue"), \ - _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \ - _PE(INVALID_REGEX, "regex did not compute"), \ - _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \ - _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \ - _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \ - _PE(REPARENT_FAILED, "failed to reparent filter OP"), \ - _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \ - _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \ - _PE(ILLEGAL_TOKEN, "illegal token"), \ - _PE(INVALID_PAREN, "open parenthesis cannot come here"), \ - _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \ - _PE(UNKNOWN_TOKEN, "unknown token"), \ - _PE(FILTER_NOT_FOUND, "no filter found"), \ - _PE(NOT_A_NUMBER, "must have number field"), \ - _PE(NO_FILTER, "no filters exists"), \ - _PE(FILTER_MISS, "record does not match to filter") - -#undef _PE -#define _PE(__code, __str) TEP_ERRNO__ ## __code -enum tep_errno { - TEP_ERRNO__SUCCESS = 0, - TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS, - - /* - * Choose an arbitrary negative big number not to clash with standard - * errno since SUS requires the errno has distinct positive values. - * See 'Issue 6' in the link below. - * - * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html - */ - __TEP_ERRNO__START = -100000, - - TEP_ERRORS, - - __TEP_ERRNO__END, -}; -#undef _PE - -struct tep_plugin_list; - -#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) - -enum tep_plugin_load_priority { - TEP_PLUGIN_FIRST, - TEP_PLUGIN_LAST, -}; - -int tep_add_plugin_path(struct tep_handle *tep, char *path, - enum tep_plugin_load_priority prio); -struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); -void tep_unload_plugins(struct tep_plugin_list *plugin_list, - struct tep_handle *tep); -void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data); -char **tep_plugin_list_options(void); -void tep_plugin_free_options_list(char **list); -int tep_plugin_add_options(const char *name, - struct tep_plugin_option *options); -int tep_plugin_add_option(const char *name, const char *val); -void tep_plugin_remove_options(struct tep_plugin_option *options); -void tep_plugin_print_options(struct trace_seq *s); -void tep_print_plugins(struct trace_seq *s, - const char *prefix, const char *suffix, - const struct tep_plugin_list *list); - -/* tep_handle */ -typedef char *(tep_func_resolver_t)(void *priv, - unsigned long long *addrp, char **modp); -void tep_set_flag(struct tep_handle *tep, int flag); -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag); -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags); - -static inline int tep_is_bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; - unsigned int val; - - memcpy(&val, str, 4); - return val == 0x01020304; -} - -/* taken from kernel/trace/trace.h */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, -}; - -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv); -void tep_reset_function_resolver(struct tep_handle *tep); -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_register_function(struct tep_handle *tep, char *name, - unsigned long long addr, char *mod); -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr); -bool tep_is_pid_registered(struct tep_handle *tep, int pid); - -struct tep_event *tep_get_event(struct tep_handle *tep, int index); - -#define TEP_PRINT_INFO "INFO" -#define TEP_PRINT_INFO_RAW "INFO_RAW" -#define TEP_PRINT_COMM "COMM" -#define TEP_PRINT_LATENCY "LATENCY" -#define TEP_PRINT_NAME "NAME" -#define TEP_PRINT_PID 1U -#define TEP_PRINT_TIME 2U -#define TEP_PRINT_CPU 3U - -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) - __attribute__ ((format (printf, 4, 5))); - -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size); - -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys); -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys); - -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err); - -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); - -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -enum tep_reg_handler { - TEP_REGISTER_SUCCESS = 0, - TEP_REGISTER_SUCCESS_OVERWRITE, -}; - -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...); -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name); - -struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name); - -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr); -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr); -unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size); -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value); - -struct tep_event *tep_get_first_event(struct tep_handle *tep); -int tep_get_events_count(struct tep_handle *tep); -struct tep_event *tep_find_event(struct tep_handle *tep, int id); - -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name); -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record); - -int tep_data_type(struct tep_handle *tep, struct tep_record *rec); -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec); -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec); -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec); -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid); -struct tep_cmdline; -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next); -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline); - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field); -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event); -int tep_strerror(struct tep_handle *tep, enum tep_errno errnum, - char *buf, size_t buflen); - -struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type); -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type); -struct tep_format_field **tep_event_common_fields(struct tep_event *event); -struct tep_format_field **tep_event_fields(struct tep_event *event); - -enum tep_endian { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; -int tep_get_cpus(struct tep_handle *tep); -void tep_set_cpus(struct tep_handle *tep, int cpus); -int tep_get_long_size(struct tep_handle *tep); -void tep_set_long_size(struct tep_handle *tep, int long_size); -int tep_get_page_size(struct tep_handle *tep); -void tep_set_page_size(struct tep_handle *tep, int _page_size); -bool tep_is_file_bigendian(struct tep_handle *tep); -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian); -bool tep_is_local_bigendian(struct tep_handle *tep); -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian); -int tep_get_header_page_size(struct tep_handle *tep); -int tep_get_header_timestamp_size(struct tep_handle *tep); -bool tep_is_old_format(struct tep_handle *tep); -void tep_set_test_filters(struct tep_handle *tep, int test_filters); - -struct tep_handle *tep_alloc(void); -void tep_free(struct tep_handle *tep); -void tep_ref(struct tep_handle *tep); -void tep_unref(struct tep_handle *tep); -int tep_get_ref(struct tep_handle *tep); - -/* for debugging */ -void tep_print_funcs(struct tep_handle *tep); -void tep_print_printk(struct tep_handle *tep); - -/* ----------------------- filtering ----------------------- */ - -enum tep_filter_boolean_type { - TEP_FILTER_FALSE, - TEP_FILTER_TRUE, -}; - -enum tep_filter_op_type { - TEP_FILTER_OP_AND = 1, - TEP_FILTER_OP_OR, - TEP_FILTER_OP_NOT, -}; - -enum tep_filter_cmp_type { - TEP_FILTER_CMP_NONE, - TEP_FILTER_CMP_EQ, - TEP_FILTER_CMP_NE, - TEP_FILTER_CMP_GT, - TEP_FILTER_CMP_LT, - TEP_FILTER_CMP_GE, - TEP_FILTER_CMP_LE, - TEP_FILTER_CMP_MATCH, - TEP_FILTER_CMP_NOT_MATCH, - TEP_FILTER_CMP_REGEX, - TEP_FILTER_CMP_NOT_REGEX, -}; - -enum tep_filter_exp_type { - TEP_FILTER_EXP_NONE, - TEP_FILTER_EXP_ADD, - TEP_FILTER_EXP_SUB, - TEP_FILTER_EXP_MUL, - TEP_FILTER_EXP_DIV, - TEP_FILTER_EXP_MOD, - TEP_FILTER_EXP_RSHIFT, - TEP_FILTER_EXP_LSHIFT, - TEP_FILTER_EXP_AND, - TEP_FILTER_EXP_OR, - TEP_FILTER_EXP_XOR, - TEP_FILTER_EXP_NOT, -}; - -enum tep_filter_arg_type { - TEP_FILTER_ARG_NONE, - TEP_FILTER_ARG_BOOLEAN, - TEP_FILTER_ARG_VALUE, - TEP_FILTER_ARG_FIELD, - TEP_FILTER_ARG_EXP, - TEP_FILTER_ARG_OP, - TEP_FILTER_ARG_NUM, - TEP_FILTER_ARG_STR, -}; - -enum tep_filter_value_type { - TEP_FILTER_NUMBER, - TEP_FILTER_STRING, - TEP_FILTER_CHAR -}; - -struct tep_filter_arg; - -struct tep_filter_arg_boolean { - enum tep_filter_boolean_type value; -}; - -struct tep_filter_arg_field { - struct tep_format_field *field; -}; - -struct tep_filter_arg_value { - enum tep_filter_value_type type; - union { - char *str; - unsigned long long val; - }; -}; - -struct tep_filter_arg_op { - enum tep_filter_op_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_exp { - enum tep_filter_exp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_num { - enum tep_filter_cmp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_str { - enum tep_filter_cmp_type type; - struct tep_format_field *field; - char *val; - char *buffer; - regex_t reg; -}; - -struct tep_filter_arg { - enum tep_filter_arg_type type; - union { - struct tep_filter_arg_boolean boolean; - struct tep_filter_arg_field field; - struct tep_filter_arg_value value; - struct tep_filter_arg_op op; - struct tep_filter_arg_exp exp; - struct tep_filter_arg_num num; - struct tep_filter_arg_str str; - }; -}; - -struct tep_filter_type { - int event_id; - struct tep_event *event; - struct tep_filter_arg *filter; -}; - -#define TEP_FILTER_ERROR_BUFSZ 1024 - -struct tep_event_filter { - struct tep_handle *tep; - int filters; - struct tep_filter_type *event_filters; - char error_buffer[TEP_FILTER_ERROR_BUFSZ]; -}; - -struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep); - -/* for backward compatibility */ -#define FILTER_NONE TEP_ERRNO__NO_FILTER -#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND -#define FILTER_MISS TEP_ERRNO__FILTER_MISS -#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH - -enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, - const char *filter_str); - -enum tep_errno tep_filter_match(struct tep_event_filter *filter, - struct tep_record *record); - -int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err, - char *buf, size_t buflen); - -int tep_event_filtered(struct tep_event_filter *filter, - int event_id); - -void tep_filter_reset(struct tep_event_filter *filter); - -void tep_filter_free(struct tep_event_filter *filter); - -char *tep_filter_make_string(struct tep_event_filter *filter, int event_id); - -int tep_filter_remove_event(struct tep_event_filter *filter, - int event_id); - -int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source); - -int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2); - -#endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c deleted file mode 100644 index e7f93d5fe4fd..000000000000 --- a/tools/lib/traceevent/event-plugin.c +++ /dev/null @@ -1,711 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/" - -static struct registered_plugin_options { - struct registered_plugin_options *next; - struct tep_plugin_option *options; -} *registered_options; - -static struct trace_plugin_options { - struct trace_plugin_options *next; - char *plugin; - char *option; - char *value; -} *trace_plugin_options; - -struct tep_plugin_list { - struct tep_plugin_list *next; - char *name; - void *handle; -}; - -struct tep_plugins_dir { - struct tep_plugins_dir *next; - char *path; - enum tep_plugin_load_priority prio; -}; - -static void lower_case(char *str) -{ - if (!str) - return; - for (; *str; str++) - *str = tolower(*str); -} - -static int update_option_value(struct tep_plugin_option *op, const char *val) -{ - char *op_val; - - if (!val) { - /* toggle, only if option is boolean */ - if (op->value) - /* Warn? */ - return 0; - op->set ^= 1; - return 0; - } - - /* - * If the option has a value then it takes a string - * otherwise the option is a boolean. - */ - if (op->value) { - op->value = val; - return 0; - } - - /* Option is boolean, must be either "1", "0", "true" or "false" */ - - op_val = strdup(val); - if (!op_val) - return -1; - lower_case(op_val); - - if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) - op->set = 1; - else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) - op->set = 0; - free(op_val); - - return 0; -} - -/** - * tep_plugin_list_options - get list of plugin options - * - * Returns an array of char strings that list the currently registered - * plugin options in the format of :