summaryrefslogtreecommitdiff
path: root/tools/perf/arch/x86/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch/x86/util')
-rw-r--r--tools/perf/arch/x86/util/Build10
-rw-r--r--tools/perf/arch/x86/util/archinsn.c27
-rw-r--r--tools/perf/arch/x86/util/event.c46
-rw-r--r--tools/perf/arch/x86/util/evlist.c66
-rw-r--r--tools/perf/arch/x86/util/evsel.c160
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c25
-rw-r--r--tools/perf/arch/x86/util/iostat.c6
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c213
-rw-r--r--tools/perf/arch/x86/util/mem-events.c6
-rw-r--r--tools/perf/arch/x86/util/mem-events.h1
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c330
-rw-r--r--tools/perf/arch/x86/util/pmu.c288
-rw-r--r--tools/perf/arch/x86/util/topdown.c62
-rw-r--r--tools/perf/arch/x86/util/topdown.h6
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c54
15 files changed, 518 insertions, 782 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 848327378694..b94c91984c66 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,8 +1,6 @@
perf-util-y += header.o
perf-util-y += tsc.o
perf-util-y += pmu.o
-perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
-perf-util-y += perf_regs.o
perf-util-y += topdown.o
perf-util-y += machine.o
perf-util-y += event.o
@@ -12,9 +10,7 @@ perf-util-y += evsel.o
perf-util-y += iostat.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
-perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
-perf-util-$(CONFIG_AUXTRACE) += archinsn.o
-perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
-perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-util-y += auxtrace.o
+perf-util-y += intel-pt.o
+perf-util-y += intel-bts.o
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
deleted file mode 100644
index 546feda08428..000000000000
--- a/tools/perf/arch/x86/util/archinsn.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "archinsn.h"
-#include "event.h"
-#include "machine.h"
-#include "thread.h"
-#include "symbol.h"
-#include "../../../../arch/x86/include/asm/insn.h"
-
-void arch_fetch_insn(struct perf_sample *sample,
- struct thread *thread,
- struct machine *machine)
-{
- struct insn insn;
- int len, ret;
- bool is64bit = false;
-
- if (!sample->ip)
- return;
- len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
- if (len <= 0)
- return;
-
- ret = insn_decode(&insn, sample->insn, len,
- is64bit ? INSN_MODE_64 : INSN_MODE_32);
- if (ret >= 0 && insn.length <= len)
- sample->insn_len = insn.length;
-}
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index a0400707180c..3cd384317739 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -91,49 +91,3 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
}
#endif
-
-void arch_perf_parse_sample_weight(struct perf_sample *data,
- const __u64 *array, u64 type)
-{
- union perf_sample_weight weight;
-
- weight.full = *array;
- if (type & PERF_SAMPLE_WEIGHT)
- data->weight = weight.full;
- else {
- data->weight = weight.var1_dw;
- data->ins_lat = weight.var2_w;
- data->retire_lat = weight.var3_w;
- }
-}
-
-void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
- __u64 *array, u64 type)
-{
- *array = data->weight;
-
- if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
- *array &= 0xffffffff;
- *array |= ((u64)data->ins_lat << 32);
- *array |= ((u64)data->retire_lat << 48);
- }
-}
-
-const char *arch_perf_header_entry(const char *se_header)
-{
- if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
- return "Local Retire Latency";
- else if (!strcmp(se_header, "Pipeline Stage Cycle"))
- return "Retire Latency";
-
- return se_header;
-}
-
-int arch_support_sort_key(const char *sort_key)
-{
- if (!strcmp(sort_key, "p_stage_cyc"))
- return 1;
- if (!strcmp(sort_key, "local_p_stage_cyc"))
- return 1;
- return 0;
-}
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 447a734e591c..75e9d00a1494 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -39,28 +39,13 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
* 26,319,024 slots
* 2,427,791 instructions
* 2,683,508 topdown-retiring
- *
- * If slots event and topdown metrics events are not in same group, the
- * topdown metrics events must be first event after the slots event group,
- * otherwise topdown metrics events can't be regrouped correctly, e.g.
- *
- * a. perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1
- * WARNING: events were regrouped to match PMUs
- * Performance counter stats for 'CPU(s) 0':
- * 17,923,134 slots
- * 2,154,855 instructions
- * 3,015,058 cycles
- * <not supported> topdown-retiring
- *
- * If slots event and topdown metrics events are in two groups, the group which
- * has topdown metrics events must contain only the topdown metrics event,
- * otherwise topdown metrics event can't be regrouped correctly as well, e.g.
- *
- * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}" -C0 sleep 1
+ * e. slots event and metrics event are not in a group and not adjacent
+ * perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1
* WARNING: events were regrouped to match PMUs
- * Error:
- * The sys_perf_event_open() syscall returned with 22 (Invalid argument) for
- * event (topdown-retiring)
+ * 68,433,522 slots
+ * 8,856,102 topdown-retiring
+ * 7,791,494 instructions
+ * 11,469,513 cycles
*/
if (topdown_sys_has_perf_metrics() &&
(arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
@@ -76,12 +61,15 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
* topdown metrics events are already in same group with slots
* event, do nothing.
*/
- if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs) &&
- lhs->core.leader != rhs->core.leader)
- return -1;
- if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs) &&
- lhs->core.leader != rhs->core.leader)
- return 1;
+ if (lhs->core.leader != rhs->core.leader) {
+ bool lhs_topdown = arch_is_topdown_metrics(lhs);
+ bool rhs_topdown = arch_is_topdown_metrics(rhs);
+
+ if (lhs_topdown && !rhs_topdown)
+ return -1;
+ if (!lhs_topdown && rhs_topdown)
+ return 1;
+ }
}
/* Retire latency event should not be group leader*/
@@ -93,3 +81,27 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
/* Default ordering by insertion index. */
return lhs->core.idx - rhs->core.idx;
}
+
+int arch_evlist__add_required_events(struct list_head *list)
+{
+ struct evsel *pos, *metric_event = NULL;
+ int idx = 0;
+
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ list_for_each_entry(pos, list, core.node) {
+ if (arch_is_topdown_slots(pos)) {
+ /* Slots event already present, nothing to do. */
+ return 0;
+ }
+ if (metric_event == NULL && arch_is_topdown_metrics(pos))
+ metric_event = pos;
+ idx++;
+ }
+ if (metric_event == NULL) {
+ /* No topdown metric events, nothing to do. */
+ return 0;
+ }
+ return topdown_insert_slots_event(list, idx + 1, metric_event);
+}
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 3dd29ba2c23b..23a8e662a912 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -1,10 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include "util/evlist.h"
#include "util/evsel.h"
+#include "util/evsel_config.h"
#include "util/env.h"
#include "util/pmu.h"
#include "util/pmus.h"
+#include "util/stat.h"
+#include "util/strbuf.h"
#include "linux/string.h"
#include "topdown.h"
#include "evsel.h"
@@ -23,47 +28,25 @@ void arch_evsel__set_sample_weight(struct evsel *evsel)
bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
{
struct perf_pmu *pmu;
- u32 type = evsel->core.attr.type;
- /*
- * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU
- * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine.
- * The slots event is only available for the core PMU, which
- * supports the perf metrics feature.
- * Checking both the PERF_TYPE_RAW type and the slots event
- * should be good enough to detect the perf metrics feature.
- */
-again:
- switch (type) {
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
- if (type)
- goto again;
- break;
- case PERF_TYPE_RAW:
- break;
- default:
+ if (!topdown_sys_has_perf_metrics())
return false;
- }
- pmu = evsel->pmu;
- if (pmu && perf_pmu__is_fake(pmu))
- pmu = NULL;
-
- if (!pmu) {
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
- if (pmu->type == PERF_TYPE_RAW)
- break;
- }
- }
- return pmu && perf_pmu__have_event(pmu, "slots");
+ /*
+ * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU on a
+ * non-hybrid machine, "cpu_core" PMU on a hybrid machine. The
+ * topdown_sys_has_perf_metrics checks the slots event is only available
+ * for the core PMU, which supports the perf metrics feature. Checking
+ * both the PERF_TYPE_RAW type and the slots event should be good enough
+ * to detect the perf metrics feature.
+ */
+ pmu = evsel__find_pmu(evsel);
+ return pmu && pmu->type == PERF_TYPE_RAW;
}
bool arch_evsel__must_be_in_group(const struct evsel *evsel)
{
- if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name ||
- strcasestr(evsel->name, "uops_retired.slots"))
+ if (!evsel__sys_has_perf_metrics(evsel))
return false;
return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel);
@@ -89,6 +72,57 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
event_name);
}
+void arch_evsel__apply_ratio_to_prev(struct evsel *evsel,
+ struct perf_event_attr *attr)
+{
+ struct perf_event_attr *prev_attr = NULL;
+ struct evsel *evsel_prev = NULL;
+ const char *name = "acr_mask";
+ int evsel_idx = 0;
+ __u64 ev_mask, pr_ev_mask;
+
+ if (!perf_pmu__has_format(evsel->pmu, name)) {
+ pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name);
+ return;
+ }
+ if (perf_pmu__format_type(evsel->pmu, name) !=
+ PERF_PMU_FORMAT_VALUE_CONFIG2) {
+ pr_err("'%s' does not have config2 format support\n", evsel->pmu->name);
+ return;
+ }
+
+ evsel_prev = evsel__prev(evsel);
+ if (!evsel_prev) {
+ pr_err("Previous event does not exist.\n");
+ return;
+ }
+
+ prev_attr = &evsel_prev->core.attr;
+
+ if (prev_attr->config2) {
+ pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name);
+ return;
+ }
+
+ /*
+ * acr_mask (config2) is calculated using the event's index in
+ * the event group. The first event will use the index of the
+ * second event as its mask (e.g., 0x2), indicating that the
+ * second event counter will be reset and a sample taken for
+ * the first event if its counter overflows. The second event
+ * will use the mask consisting of the first and second bits
+ * (e.g., 0x3), meaning both counters will be reset if the
+ * second event counter overflows.
+ */
+
+ evsel_idx = evsel__group_idx(evsel);
+ ev_mask = 1ull << evsel_idx;
+ pr_ev_mask = 1ull << (evsel_idx - 1);
+
+ prev_attr->config2 = ev_mask;
+ attr->config2 = ev_mask | pr_ev_mask;
+}
+
static void ibs_l3miss_warn(void)
{
pr_warning(
@@ -124,13 +158,15 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
}
}
-int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
+static int amd_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
{
- if (!x86__is_amd_cpu())
+ struct perf_pmu *pmu;
+
+ if (evsel->core.attr.precise_ip == 0)
return 0;
- if (!evsel->core.attr.precise_ip &&
- !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3)))
+ pmu = evsel__find_pmu(evsel);
+ if (!pmu || strncmp(pmu->name, "ibs", 3))
return 0;
/* More verbose IBS errors. */
@@ -140,6 +176,54 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try "
"again without the privilege modifiers (like 'k') at the end.");
}
+ return 0;
+}
+
+static int intel_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret;
+ if (err != EINVAL)
+ return 0;
+
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ if (arch_is_topdown_slots(evsel)) {
+ if (!evsel__is_group_leader(evsel)) {
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Topdown slots event can only be group leader "
+ "in '%s'.", sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ } else if (arch_is_topdown_metrics(evsel)) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos == evsel || !arch_is_topdown_metrics(pos))
+ continue;
+
+ if (pos->core.attr.config != evsel->core.attr.config)
+ continue;
+
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Perf metric event '%s' is duplicated "
+ "in the same group (only one event is allowed) in '%s'.",
+ evsel__name(evsel), sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ }
return 0;
}
+
+int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ return x86__is_amd_cpu()
+ ? amd_evsel__open_strerror(evsel, msg, size)
+ : intel_evsel__open_strerror(evsel, err, msg, size);
+}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 8f235d8b67b6..c131a727774f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -12,13 +12,13 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
#include <linux/err.h>
-#include <cpuid.h>
#include "../../../util/session.h"
#include "../../../util/event.h"
#include "../../../util/evlist.h"
#include "../../../util/evsel.h"
#include "../../../util/evsel_config.h"
+#include "../../../util/config.h"
#include "../../../util/cpumap.h"
#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
@@ -33,6 +33,7 @@
#include <internal/lib.h> // page_size
#include "../../../util/intel-pt.h"
#include <api/fs/fs.h>
+#include "cpuid.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -52,6 +53,7 @@ struct intel_pt_recording {
struct perf_pmu *intel_pt_pmu;
int have_sched_switch;
struct evlist *evlist;
+ bool all_switch_events;
bool snapshot_mode;
bool snapshot_init_done;
size_t snapshot_size;
@@ -70,7 +72,7 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
int err;
parse_events_terms__init(&terms);
- err = parse_events_terms(&terms, str, /*input=*/ NULL);
+ err = parse_events_terms(&terms, str);
if (err)
goto out_free;
@@ -309,7 +311,7 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ cpuid(0x15, 0, &eax, &ebx, &ecx, &edx);
*n = ebx;
*d = eax;
}
@@ -662,8 +664,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
return 0;
if (opts->auxtrace_sample_mode)
- evsel__set_config_if_unset(intel_pt_pmu, intel_pt_evsel,
- "psb_period", 0);
+ evsel__set_config_if_unset(intel_pt_evsel, "psb_period", 0);
err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
if (err)
@@ -794,7 +795,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
- if (!cpu_wide && perf_can_record_cpu_wide()) {
+ if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
@@ -1178,6 +1179,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt_recording *ptr = data;
+
+ if (!strcmp(var, "intel-pt.all-switch-events"))
+ ptr->all_switch_events = perf_config_bool(var, value);
+
+ return 0;
+}
+
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
@@ -1197,6 +1208,8 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
return NULL;
}
+ perf_config(intel_pt_perf_config, ptr);
+
ptr->intel_pt_pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 366b44d0bb7e..7442a2cd87ed 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -32,7 +32,7 @@
#define MAX_PATH 1024
#endif
-#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d"
+#define UNCORE_IIO_PMU_PATH "bus/event_source/devices/uncore_iio_%d"
#define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH
#define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d"
@@ -403,6 +403,10 @@ void iostat_prefix(struct evlist *evlist,
struct iio_root_port *rp = evlist->selected->priv;
if (rp) {
+ /*
+ * TODO: This is the incorrect format in JSON mode.
+ * See prepare_timestamp()
+ */
if (ts)
sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s",
ts->tv_sec, ts->tv_nsec,
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
deleted file mode 100644
index 424716518b75..000000000000
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ /dev/null
@@ -1,213 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <string.h>
-#include "../../../util/kvm-stat.h"
-#include "../../../util/evsel.h"
-#include <asm/svm.h>
-#include <asm/vmx.h>
-#include <asm/kvm.h>
-
-define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
-define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
-
-static struct kvm_events_ops exit_events = {
- .is_begin_event = exit_event_begin,
- .is_end_event = exit_event_end,
- .decode_key = exit_event_decode_key,
- .name = "VM-EXIT"
-};
-
-const char *vcpu_id_str = "vcpu_id";
-const char *kvm_exit_reason = "exit_reason";
-const char *kvm_entry_trace = "kvm:kvm_entry";
-const char *kvm_exit_trace = "kvm:kvm_exit";
-
-/*
- * For the mmio events, we treat:
- * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
- * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
- */
-static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample,
- struct event_key *key)
-{
- key->key = evsel__intval(evsel, sample, "gpa");
- key->info = evsel__intval(evsel, sample, "type");
-}
-
-#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
-#define KVM_TRACE_MMIO_READ 1
-#define KVM_TRACE_MMIO_WRITE 2
-
-static bool mmio_event_begin(struct evsel *evsel,
- struct perf_sample *sample, struct event_key *key)
-{
- /* MMIO read begin event in kernel. */
- if (kvm_exit_event(evsel))
- return true;
-
- /* MMIO write begin event in kernel. */
- if (evsel__name_is(evsel, "kvm:kvm_mmio") &&
- evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
- mmio_event_get_key(evsel, sample, key);
- return true;
- }
-
- return false;
-}
-
-static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample,
- struct event_key *key)
-{
- /* MMIO write end event in kernel. */
- if (kvm_entry_event(evsel))
- return true;
-
- /* MMIO read end event in kernel.*/
- if (evsel__name_is(evsel, "kvm:kvm_mmio") &&
- evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
- mmio_event_get_key(evsel, sample, key);
- return true;
- }
-
- return false;
-}
-
-static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
- struct event_key *key,
- char *decode)
-{
- scnprintf(decode, KVM_EVENT_NAME_LEN, "%#lx:%s",
- (unsigned long)key->key,
- key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
-}
-
-static struct kvm_events_ops mmio_events = {
- .is_begin_event = mmio_event_begin,
- .is_end_event = mmio_event_end,
- .decode_key = mmio_event_decode_key,
- .name = "MMIO Access"
-};
-
- /* The time of emulation pio access is from kvm_pio to kvm_entry. */
-static void ioport_event_get_key(struct evsel *evsel,
- struct perf_sample *sample,
- struct event_key *key)
-{
- key->key = evsel__intval(evsel, sample, "port");
- key->info = evsel__intval(evsel, sample, "rw");
-}
-
-static bool ioport_event_begin(struct evsel *evsel,
- struct perf_sample *sample,
- struct event_key *key)
-{
- if (evsel__name_is(evsel, "kvm:kvm_pio")) {
- ioport_event_get_key(evsel, sample, key);
- return true;
- }
-
- return false;
-}
-
-static bool ioport_event_end(struct evsel *evsel,
- struct perf_sample *sample __maybe_unused,
- struct event_key *key __maybe_unused)
-{
- return kvm_entry_event(evsel);
-}
-
-static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
- struct event_key *key,
- char *decode)
-{
- scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s",
- (unsigned long long)key->key,
- key->info ? "POUT" : "PIN");
-}
-
-static struct kvm_events_ops ioport_events = {
- .is_begin_event = ioport_event_begin,
- .is_end_event = ioport_event_end,
- .decode_key = ioport_event_decode_key,
- .name = "IO Port Access"
-};
-
- /* The time of emulation msr is from kvm_msr to kvm_entry. */
-static void msr_event_get_key(struct evsel *evsel,
- struct perf_sample *sample,
- struct event_key *key)
-{
- key->key = evsel__intval(evsel, sample, "ecx");
- key->info = evsel__intval(evsel, sample, "write");
-}
-
-static bool msr_event_begin(struct evsel *evsel,
- struct perf_sample *sample,
- struct event_key *key)
-{
- if (evsel__name_is(evsel, "kvm:kvm_msr")) {
- msr_event_get_key(evsel, sample, key);
- return true;
- }
-
- return false;
-}
-
-static bool msr_event_end(struct evsel *evsel,
- struct perf_sample *sample __maybe_unused,
- struct event_key *key __maybe_unused)
-{
- return kvm_entry_event(evsel);
-}
-
-static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
- struct event_key *key,
- char *decode)
-{
- scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s",
- (unsigned long long)key->key,
- key->info ? "W" : "R");
-}
-
-static struct kvm_events_ops msr_events = {
- .is_begin_event = msr_event_begin,
- .is_end_event = msr_event_end,
- .decode_key = msr_event_decode_key,
- .name = "MSR Access"
-};
-
-const char *kvm_events_tp[] = {
- "kvm:kvm_entry",
- "kvm:kvm_exit",
- "kvm:kvm_mmio",
- "kvm:kvm_pio",
- "kvm:kvm_msr",
- NULL,
-};
-
-struct kvm_reg_events_ops kvm_reg_events_ops[] = {
- { .name = "vmexit", .ops = &exit_events },
- { .name = "mmio", .ops = &mmio_events },
- { .name = "ioport", .ops = &ioport_events },
- { .name = "msr", .ops = &msr_events },
- { NULL, NULL },
-};
-
-const char * const kvm_skip_events[] = {
- "HLT",
- NULL,
-};
-
-int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
-{
- if (strstr(cpuid, "Intel")) {
- kvm->exit_reasons = vmx_exit_reasons;
- kvm->exit_reasons_isa = "VMX";
- } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) {
- kvm->exit_reasons = svm_exit_reasons;
- kvm->exit_reasons_isa = "SVM";
- } else
- return -ENOTSUP;
-
- return 0;
-}
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index 62df03e91c7e..b38f519020ff 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -26,3 +26,9 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
E(NULL, NULL, NULL, false, 0),
E("mem-ldst", "%s//", NULL, false, 0),
};
+
+struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
index f55c8d3b7d59..11e09a256f5b 100644
--- a/tools/perf/arch/x86/util/mem-events.h
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -6,5 +6,6 @@ extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
#endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
deleted file mode 100644
index 12fd93f04802..000000000000
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ /dev/null
@@ -1,330 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <string.h>
-#include <regex.h>
-#include <linux/kernel.h>
-#include <linux/zalloc.h>
-
-#include "perf_regs.h"
-#include "../../../perf-sys.h"
-#include "../../../util/perf_regs.h"
-#include "../../../util/debug.h"
-#include "../../../util/event.h"
-#include "../../../util/pmu.h"
-#include "../../../util/pmus.h"
-
-static const struct sample_reg sample_reg_masks[] = {
- SMPL_REG(AX, PERF_REG_X86_AX),
- SMPL_REG(BX, PERF_REG_X86_BX),
- SMPL_REG(CX, PERF_REG_X86_CX),
- SMPL_REG(DX, PERF_REG_X86_DX),
- SMPL_REG(SI, PERF_REG_X86_SI),
- SMPL_REG(DI, PERF_REG_X86_DI),
- SMPL_REG(BP, PERF_REG_X86_BP),
- SMPL_REG(SP, PERF_REG_X86_SP),
- SMPL_REG(IP, PERF_REG_X86_IP),
- SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
- SMPL_REG(CS, PERF_REG_X86_CS),
- SMPL_REG(SS, PERF_REG_X86_SS),
-#ifdef HAVE_ARCH_X86_64_SUPPORT
- SMPL_REG(R8, PERF_REG_X86_R8),
- SMPL_REG(R9, PERF_REG_X86_R9),
- SMPL_REG(R10, PERF_REG_X86_R10),
- SMPL_REG(R11, PERF_REG_X86_R11),
- SMPL_REG(R12, PERF_REG_X86_R12),
- SMPL_REG(R13, PERF_REG_X86_R13),
- SMPL_REG(R14, PERF_REG_X86_R14),
- SMPL_REG(R15, PERF_REG_X86_R15),
-#endif
- SMPL_REG2(XMM0, PERF_REG_X86_XMM0),
- SMPL_REG2(XMM1, PERF_REG_X86_XMM1),
- SMPL_REG2(XMM2, PERF_REG_X86_XMM2),
- SMPL_REG2(XMM3, PERF_REG_X86_XMM3),
- SMPL_REG2(XMM4, PERF_REG_X86_XMM4),
- SMPL_REG2(XMM5, PERF_REG_X86_XMM5),
- SMPL_REG2(XMM6, PERF_REG_X86_XMM6),
- SMPL_REG2(XMM7, PERF_REG_X86_XMM7),
- SMPL_REG2(XMM8, PERF_REG_X86_XMM8),
- SMPL_REG2(XMM9, PERF_REG_X86_XMM9),
- SMPL_REG2(XMM10, PERF_REG_X86_XMM10),
- SMPL_REG2(XMM11, PERF_REG_X86_XMM11),
- SMPL_REG2(XMM12, PERF_REG_X86_XMM12),
- SMPL_REG2(XMM13, PERF_REG_X86_XMM13),
- SMPL_REG2(XMM14, PERF_REG_X86_XMM14),
- SMPL_REG2(XMM15, PERF_REG_X86_XMM15),
- SMPL_REG_END
-};
-
-struct sdt_name_reg {
- const char *sdt_name;
- const char *uprobe_name;
-};
-#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
-#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
-
-static const struct sdt_name_reg sdt_reg_tbl[] = {
- SDT_NAME_REG(eax, ax),
- SDT_NAME_REG(rax, ax),
- SDT_NAME_REG(al, ax),
- SDT_NAME_REG(ah, ax),
- SDT_NAME_REG(ebx, bx),
- SDT_NAME_REG(rbx, bx),
- SDT_NAME_REG(bl, bx),
- SDT_NAME_REG(bh, bx),
- SDT_NAME_REG(ecx, cx),
- SDT_NAME_REG(rcx, cx),
- SDT_NAME_REG(cl, cx),
- SDT_NAME_REG(ch, cx),
- SDT_NAME_REG(edx, dx),
- SDT_NAME_REG(rdx, dx),
- SDT_NAME_REG(dl, dx),
- SDT_NAME_REG(dh, dx),
- SDT_NAME_REG(esi, si),
- SDT_NAME_REG(rsi, si),
- SDT_NAME_REG(sil, si),
- SDT_NAME_REG(edi, di),
- SDT_NAME_REG(rdi, di),
- SDT_NAME_REG(dil, di),
- SDT_NAME_REG(ebp, bp),
- SDT_NAME_REG(rbp, bp),
- SDT_NAME_REG(bpl, bp),
- SDT_NAME_REG(rsp, sp),
- SDT_NAME_REG(esp, sp),
- SDT_NAME_REG(spl, sp),
-
- /* rNN registers */
- SDT_NAME_REG(r8b, r8),
- SDT_NAME_REG(r8w, r8),
- SDT_NAME_REG(r8d, r8),
- SDT_NAME_REG(r9b, r9),
- SDT_NAME_REG(r9w, r9),
- SDT_NAME_REG(r9d, r9),
- SDT_NAME_REG(r10b, r10),
- SDT_NAME_REG(r10w, r10),
- SDT_NAME_REG(r10d, r10),
- SDT_NAME_REG(r11b, r11),
- SDT_NAME_REG(r11w, r11),
- SDT_NAME_REG(r11d, r11),
- SDT_NAME_REG(r12b, r12),
- SDT_NAME_REG(r12w, r12),
- SDT_NAME_REG(r12d, r12),
- SDT_NAME_REG(r13b, r13),
- SDT_NAME_REG(r13w, r13),
- SDT_NAME_REG(r13d, r13),
- SDT_NAME_REG(r14b, r14),
- SDT_NAME_REG(r14w, r14),
- SDT_NAME_REG(r14d, r14),
- SDT_NAME_REG(r15b, r15),
- SDT_NAME_REG(r15w, r15),
- SDT_NAME_REG(r15d, r15),
- SDT_NAME_REG_END,
-};
-
-/*
- * Perf only supports OP which is in +/-NUM(REG) form.
- * Here plus-minus sign, NUM and parenthesis are optional,
- * only REG is mandatory.
- *
- * SDT events also supports indirect addressing mode with a
- * symbol as offset, scaled mode and constants in OP. But
- * perf does not support them yet. Below are few examples.
- *
- * OP with scaled mode:
- * (%rax,%rsi,8)
- * 10(%ras,%rsi,8)
- *
- * OP with indirect addressing mode:
- * check_action(%rip)
- * mp_+52(%rip)
- * 44+mp_(%rip)
- *
- * OP with constant values:
- * $0
- * $123
- * $-1
- */
-#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
-
-static regex_t sdt_op_regex;
-
-static int sdt_init_op_regex(void)
-{
- static int initialized;
- int ret = 0;
-
- if (initialized)
- return 0;
-
- ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
- if (ret < 0) {
- pr_debug4("Regex compilation error.\n");
- return ret;
- }
-
- initialized = 1;
- return 0;
-}
-
-/*
- * Max x86 register name length is 5(ex: %r15d). So, 6th char
- * should always contain NULL. This helps to find register name
- * length using strlen, instead of maintaining one more variable.
- */
-#define SDT_REG_NAME_SIZE 6
-
-/*
- * The uprobe parser does not support all gas register names;
- * so, we have to replace them (ex. for x86_64: %rax -> %ax).
- * Note: If register does not require renaming, just copy
- * paste as it is, but don't leave it empty.
- */
-static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
-{
- int i = 0;
-
- for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
- if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
- strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
- return;
- }
- }
-
- strncpy(uprobe_reg, sdt_reg, sdt_len);
-}
-
-int arch_sdt_arg_parse_op(char *old_op, char **new_op)
-{
- char new_reg[SDT_REG_NAME_SIZE] = {0};
- int new_len = 0, ret;
- /*
- * rm[0]: +/-NUM(REG)
- * rm[1]: +/-
- * rm[2]: NUM
- * rm[3]: (
- * rm[4]: REG
- * rm[5]: )
- */
- regmatch_t rm[6];
- /*
- * Max prefix length is 2 as it may contains sign(+/-)
- * and displacement 0 (Both sign and displacement 0 are
- * optional so it may be empty). Use one more character
- * to hold last NULL so that strlen can be used to find
- * prefix length, instead of maintaining one more variable.
- */
- char prefix[3] = {0};
-
- ret = sdt_init_op_regex();
- if (ret < 0)
- return ret;
-
- /*
- * If unsupported OR does not match with regex OR
- * register name too long, skip it.
- */
- if (strchr(old_op, ',') || strchr(old_op, '$') ||
- regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
- rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
- pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
- return SDT_ARG_SKIP;
- }
-
- /*
- * Prepare prefix.
- * If SDT OP has parenthesis but does not provide
- * displacement, add 0 for displacement.
- * SDT Uprobe Prefix
- * -----------------------------
- * +24(%rdi) +24(%di) +
- * 24(%rdi) +24(%di) +
- * %rdi %di
- * (%rdi) +0(%di) +0
- * -80(%rbx) -80(%bx) -
- */
- if (rm[3].rm_so != rm[3].rm_eo) {
- if (rm[1].rm_so != rm[1].rm_eo)
- prefix[0] = *(old_op + rm[1].rm_so);
- else if (rm[2].rm_so != rm[2].rm_eo)
- prefix[0] = '+';
- else
- scnprintf(prefix, sizeof(prefix), "+0");
- }
-
- /* Rename register */
- sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
- new_reg);
-
- /* Prepare final OP which should be valid for uprobe_events */
- new_len = strlen(prefix) +
- (rm[2].rm_eo - rm[2].rm_so) +
- (rm[3].rm_eo - rm[3].rm_so) +
- strlen(new_reg) +
- (rm[5].rm_eo - rm[5].rm_so) +
- 1; /* NULL */
-
- *new_op = zalloc(new_len);
- if (!*new_op)
- return -ENOMEM;
-
- scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
- strlen(prefix), prefix,
- (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
- (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
- strlen(new_reg), new_reg,
- (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
-
- return SDT_ARG_VALID;
-}
-
-const struct sample_reg *arch__sample_reg_masks(void)
-{
- return sample_reg_masks;
-}
-
-uint64_t arch__intr_reg_mask(void)
-{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .sample_type = PERF_SAMPLE_REGS_INTR,
- .sample_regs_intr = PERF_REG_EXTENDED_MASK,
- .precise_ip = 1,
- .disabled = 1,
- .exclude_kernel = 1,
- };
- int fd;
- /*
- * In an unnamed union, init it here to build on older gcc versions
- */
- attr.sample_period = 1;
-
- if (perf_pmus__num_core_pmus() > 1) {
- struct perf_pmu *pmu = NULL;
- __u64 type = PERF_TYPE_RAW;
-
- /*
- * The same register set is supported among different hybrid PMUs.
- * Only check the first available one.
- */
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
- type = pmu->type;
- break;
- }
- attr.config |= type << PERF_PMU_TYPE_SHIFT;
- }
-
- event_attr_init(&attr);
-
- fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
- if (fd != -1) {
- close(fd);
- return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
- }
-
- return PERF_REGS_MASK;
-}
-
-uint64_t arch__user_reg_mask(void)
-{
- return PERF_REGS_MASK;
-}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e0060dac2a9f..7c9d238922a6 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -5,9 +5,11 @@
#include <dirent.h>
#include <fcntl.h>
#include <linux/stddef.h>
+#include <linux/string.h>
#include <linux/perf_event.h>
-#include <linux/zalloc.h>
#include <api/fs/fs.h>
+#include <api/io_dir.h>
+#include <internal/cpumap.h>
#include <errno.h>
#include "../../../util/intel-pt.h"
@@ -16,11 +18,257 @@
#include "../../../util/fncache.h"
#include "../../../util/pmus.h"
#include "mem-events.h"
+#include "util/debug.h"
#include "util/env.h"
+#include "util/header.h"
-void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
+static bool x86__is_intel_graniterapids(void)
{
-#ifdef HAVE_AUXTRACE_SUPPORT
+ static bool checked_if_graniterapids;
+ static bool is_graniterapids;
+
+ if (!checked_if_graniterapids) {
+ const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
+ char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+ is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
+ free(cpuid);
+ checked_if_graniterapids = true;
+ }
+ return is_graniterapids;
+}
+
+static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
+{
+ struct perf_cpu_map *cpus;
+ char *buf = NULL;
+ size_t buf_len;
+
+ if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0)
+ return NULL;
+
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
+ return cpus;
+}
+
+static int snc_nodes_per_l3_cache(void)
+{
+ static bool checked_snc;
+ static int snc_nodes;
+
+ if (!checked_snc) {
+ struct perf_cpu_map *node_cpus =
+ read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+ struct perf_cpu_map *cache_cpus =
+ read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+ snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+ perf_cpu_map__put(cache_cpus);
+ perf_cpu_map__put(node_cpus);
+ checked_snc = true;
+ }
+ return snc_nodes;
+}
+
+static int num_chas(void)
+{
+ static bool checked_chas;
+ static int num_chas;
+
+ if (!checked_chas) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+
+ if (fd < 0)
+ return -1;
+
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+ if (strstarts(dent->d_name, "uncore_cha_"))
+ num_chas++;
+ }
+ close(fd);
+ checked_chas = true;
+ }
+ return num_chas;
+}
+
+#define MAX_SNCS 6
+
+static int uncore_cha_snc(struct perf_pmu *pmu)
+{
+ // CHA SNC numbers are ordered correspond to the CHAs number.
+ unsigned int cha_num;
+ int num_cha, chas_per_node, cha_snc;
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes <= 1)
+ return 0;
+
+ num_cha = num_chas();
+ if (num_cha <= 0) {
+ pr_warning("Unexpected: no CHAs found\n");
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
+ pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name);
+ return 0;
+ }
+ chas_per_node = num_cha / snc_nodes;
+ cha_snc = cha_num / chas_per_node;
+
+ /* Range check cha_snc. for unexpected out of bounds. */
+ return cha_snc >= MAX_SNCS ? 0 : cha_snc;
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+ // Compute the IMC SNC using lookup tables.
+ unsigned int imc_num;
+ int snc_nodes = snc_nodes_per_l3_cache();
+ const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
+ const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
+ const u8 *snc_map;
+ size_t snc_map_len;
+
+ switch (snc_nodes) {
+ case 2:
+ snc_map = snc2_map;
+ snc_map_len = ARRAY_SIZE(snc2_map);
+ break;
+ case 3:
+ snc_map = snc3_map;
+ snc_map_len = ARRAY_SIZE(snc3_map);
+ break;
+ default:
+ /* Error or no lookup support for SNC with >3 nodes. */
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
+ pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
+ return 0;
+ }
+ if (imc_num >= snc_map_len) {
+ pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+ return 0;
+ }
+ return snc_map[imc_num];
+}
+
+static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
+{
+ static bool checked_cpu_adjust[MAX_SNCS];
+ static int cpu_adjust[MAX_SNCS];
+ struct perf_cpu_map *node_cpus;
+ char node_path[] = "devices/system/node/node0/cpulist";
+
+ /* Was adjust already computed? */
+ if (checked_cpu_adjust[pmu_snc])
+ return cpu_adjust[pmu_snc];
+
+ /* SNC0 doesn't need an adjust. */
+ if (pmu_snc == 0) {
+ cpu_adjust[0] = 0;
+ checked_cpu_adjust[0] = true;
+ return 0;
+ }
+
+ /*
+ * Use NUMA topology to compute first CPU of the NUMA node, we want to
+ * adjust CPU 0 to be this and similarly for other CPUs if there is >1
+ * socket.
+ */
+ assert(pmu_snc >= 0 && pmu_snc <= 9);
+ node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
+ node_cpus = read_sysfs_cpu_map(node_path);
+ cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu;
+ if (cpu_adjust[pmu_snc] < 0) {
+ pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path);
+ cpu_adjust[pmu_snc] = 0;
+ } else {
+ checked_cpu_adjust[pmu_snc] = true;
+ }
+ perf_cpu_map__put(node_cpus);
+ return cpu_adjust[pmu_snc];
+}
+
+static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+{
+ // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
+ // topology. For example, a two socket graniterapids machine may be set
+ // up with 3-way SNC meaning there are 6 NUMA nodes that should be
+ // displayed with --per-node. The cpumask of the CHA and IMC PMUs
+ // reflects per-socket information meaning, for example, uncore_cha_60
+ // on a two socket graniterapids machine with 120 cores per socket will
+ // have a cpumask of "0,120". This cpumask needs adjusting to "40,160"
+ // to reflect that uncore_cha_60 is used for the 2nd SNC of each
+ // socket. Without the adjustment events on uncore_cha_60 will appear in
+ // node 0 and node 3 (in our example 2 socket 3-way set up), but with
+ // the adjustment they will appear in node 1 and node 4. The number of
+ // CHAs is typically larger than the number of cores. The CHA numbers
+ // are assumed to split evenly and inorder wrt core numbers. There are
+ // fewer memory IMC PMUs than cores and mapping is handled using lookup
+ // tables.
+ static struct perf_cpu_map *cha_adjusted[MAX_SNCS];
+ static struct perf_cpu_map *imc_adjusted[MAX_SNCS];
+ struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted;
+ unsigned int idx;
+ int pmu_snc, cpu_adjust;
+ struct perf_cpu cpu;
+ bool alloc;
+
+ // Cpus from the kernel holds first CPU of each socket. e.g. 0,120.
+ if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
+ pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name);
+ return;
+ }
+
+ pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
+ if (pmu_snc == 0) {
+ // No adjustment necessary for the first SNC.
+ return;
+ }
+
+ alloc = adjusted[pmu_snc] == NULL;
+ if (alloc) {
+ // Hold onto the perf_cpu_map globally to avoid recomputation.
+ cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
+ adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
+ if (!adjusted[pmu_snc])
+ return;
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
+ // Compute the new cpu map values or if not allocating, assert
+ // that they match expectations. asserts will be removed to
+ // avoid overhead in NDEBUG builds.
+ if (alloc) {
+ RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
+ } else if (idx == 0) {
+ cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
+ assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
+ } else {
+ assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
+ cpu.cpu + cpu_adjust);
+ }
+ }
+
+ perf_cpu_map__put(pmu->cpus);
+ pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+}
+
+void perf_pmu__arch_init(struct perf_pmu *pmu)
+{
+ struct perf_pmu_caps *ldlat_cap;
+
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
pmu->selectable = true;
@@ -30,15 +278,33 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
pmu->auxtrace = true;
pmu->selectable = true;
}
-#endif
if (x86__is_amd_cpu()) {
- if (!strcmp(pmu->name, "ibs_op"))
- pmu->mem_events = perf_mem_events_amd;
- } else if (pmu->is_core) {
- if (perf_pmu__have_event(pmu, "mem-loads-aux"))
- pmu->mem_events = perf_mem_events_intel_aux;
- else
- pmu->mem_events = perf_mem_events_intel;
+ if (strcmp(pmu->name, "ibs_op"))
+ return;
+
+ pmu->mem_events = perf_mem_events_amd;
+
+ if (!perf_pmu__caps_parse(pmu))
+ return;
+
+ ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
+ if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
+ return;
+
+ perf_mem_events__loads_ldlat = 0;
+ pmu->mem_events = perf_mem_events_amd_ldlat;
+ } else {
+ if (pmu->is_core) {
+ if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+ pmu->mem_events = perf_mem_events_intel_aux;
+ else
+ pmu->mem_events = perf_mem_events_intel;
+ } else if (x86__is_intel_graniterapids()) {
+ if (strstarts(pmu->name, "uncore_cha_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+ else if (strstarts(pmu->name, "uncore_imc_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ }
}
}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index f63747d0abdf..bafd285119d7 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include "api/fs/fs.h"
-#include "util/evsel.h"
+#include <errno.h>
#include "util/evlist.h"
#include "util/pmu.h"
#include "util/pmus.h"
@@ -8,6 +7,9 @@
#include "topdown.h"
#include "evsel.h"
+// cmask=0, inv=0, pc=0, edge=0, umask=4, event=0
+#define TOPDOWN_SLOTS 0x0400
+
/* Check whether there is a PMU which supports the perf metrics. */
bool topdown_sys_has_perf_metrics(void)
{
@@ -32,31 +34,19 @@ bool topdown_sys_has_perf_metrics(void)
return has_perf_metrics;
}
-#define TOPDOWN_SLOTS 0x0400
bool arch_is_topdown_slots(const struct evsel *evsel)
{
- if (evsel->core.attr.config == TOPDOWN_SLOTS)
- return true;
-
- return false;
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ evsel->core.attr.config == TOPDOWN_SLOTS &&
+ evsel->core.attr.config1 == 0;
}
bool arch_is_topdown_metrics(const struct evsel *evsel)
{
- int config = evsel->core.attr.config;
- const char *name_from_config;
- struct perf_pmu *pmu;
-
- /* All topdown events have an event code of 0. */
- if ((config & 0xFF) != 0)
- return false;
-
- pmu = evsel__find_pmu(evsel);
- if (!pmu || !pmu->is_core)
- return false;
-
- name_from_config = perf_pmu__name_from_config(pmu, config);
- return name_from_config && strcasestr(name_from_config, "topdown");
+ // cmask=0, inv=0, pc=0, edge=0, umask=0x80-0x87, event=0
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ (evsel->core.attr.config & 0xFFFFF8FF) == 0x8000 &&
+ evsel->core.attr.config1 == 0;
}
/*
@@ -81,10 +71,38 @@ bool arch_topdown_sample_read(struct evsel *leader)
*/
evlist__for_each_entry(leader->evlist, evsel) {
if (evsel->core.leader != leader->core.leader)
- return false;
+ continue;
if (evsel != leader && arch_is_topdown_metrics(evsel))
return true;
}
return false;
}
+
+/*
+ * Make a copy of the topdown metric event metric_event with the given index but
+ * change its configuration to be a topdown slots event. Copying from
+ * metric_event ensures modifiers are the same.
+ */
+int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event)
+{
+ struct evsel *evsel = evsel__new_idx(&metric_event->core.attr, idx);
+
+ if (!evsel)
+ return -ENOMEM;
+
+ evsel->core.attr.config = TOPDOWN_SLOTS;
+ evsel->core.cpus = perf_cpu_map__get(metric_event->core.cpus);
+ evsel->core.pmu_cpus = perf_cpu_map__get(metric_event->core.pmu_cpus);
+ evsel->core.is_pmu_core = true;
+ evsel->pmu = metric_event->pmu;
+ evsel->name = strdup("slots");
+ evsel->precise_max = metric_event->precise_max;
+ evsel->sample_read = metric_event->sample_read;
+ evsel->weak_group = metric_event->weak_group;
+ evsel->bpf_counter = metric_event->bpf_counter;
+ evsel->retire_lat = metric_event->retire_lat;
+ evsel__set_leader(evsel, evsel__leader(metric_event));
+ list_add_tail(&evsel->core.node, list);
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
index 1bae9b1822d7..69035565e649 100644
--- a/tools/perf/arch/x86/util/topdown.h
+++ b/tools/perf/arch/x86/util/topdown.h
@@ -2,8 +2,14 @@
#ifndef _TOPDOWN_H
#define _TOPDOWN_H 1
+#include <stdbool.h>
+
+struct evsel;
+struct list_head;
+
bool topdown_sys_has_perf_metrics(void);
bool arch_is_topdown_slots(const struct evsel *evsel);
bool arch_is_topdown_metrics(const struct evsel *evsel);
+int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event);
#endif
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
deleted file mode 100644
index edb77e20e083..000000000000
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <elfutils/libdwfl.h>
-#include "perf_regs.h"
-#include "../../../util/unwind-libdw.h"
-#include "../../../util/perf_regs.h"
-#include "util/sample.h"
-
-bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
-{
- struct unwind_info *ui = arg;
- struct regs_dump *user_regs = &ui->sample->user_regs;
- Dwarf_Word dwarf_regs[17];
- unsigned nregs;
-
-#define REG(r) ({ \
- Dwarf_Word val = 0; \
- perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
- val; \
-})
-
- if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
- dwarf_regs[0] = REG(AX);
- dwarf_regs[1] = REG(CX);
- dwarf_regs[2] = REG(DX);
- dwarf_regs[3] = REG(BX);
- dwarf_regs[4] = REG(SP);
- dwarf_regs[5] = REG(BP);
- dwarf_regs[6] = REG(SI);
- dwarf_regs[7] = REG(DI);
- dwarf_regs[8] = REG(IP);
- nregs = 9;
- } else {
- dwarf_regs[0] = REG(AX);
- dwarf_regs[1] = REG(DX);
- dwarf_regs[2] = REG(CX);
- dwarf_regs[3] = REG(BX);
- dwarf_regs[4] = REG(SI);
- dwarf_regs[5] = REG(DI);
- dwarf_regs[6] = REG(BP);
- dwarf_regs[7] = REG(SP);
- dwarf_regs[8] = REG(R8);
- dwarf_regs[9] = REG(R9);
- dwarf_regs[10] = REG(R10);
- dwarf_regs[11] = REG(R11);
- dwarf_regs[12] = REG(R12);
- dwarf_regs[13] = REG(R13);
- dwarf_regs[14] = REG(R14);
- dwarf_regs[15] = REG(R15);
- dwarf_regs[16] = REG(IP);
- nregs = 17;
- }
-
- return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
-}