diff options
Diffstat (limited to 'tools/perf/arch/x86/util')
| -rw-r--r-- | tools/perf/arch/x86/util/Build | 10 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/archinsn.c | 27 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/event.c | 46 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/evlist.c | 66 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/evsel.c | 160 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/intel-pt.c | 25 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/iostat.c | 6 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/kvm-stat.c | 213 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/mem-events.c | 6 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/mem-events.h | 1 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/perf_regs.c | 330 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/pmu.c | 288 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/topdown.c | 62 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/topdown.h | 6 | ||||
| -rw-r--r-- | tools/perf/arch/x86/util/unwind-libdw.c | 54 |
15 files changed, 518 insertions, 782 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 848327378694..b94c91984c66 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,8 +1,6 @@ perf-util-y += header.o perf-util-y += tsc.o perf-util-y += pmu.o -perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-y += perf_regs.o perf-util-y += topdown.o perf-util-y += machine.o perf-util-y += event.o @@ -12,9 +10,7 @@ perf-util-y += evsel.o perf-util-y += iostat.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -perf-util-$(CONFIG_AUXTRACE) += auxtrace.o -perf-util-$(CONFIG_AUXTRACE) += archinsn.o -perf-util-$(CONFIG_AUXTRACE) += intel-pt.o -perf-util-$(CONFIG_AUXTRACE) += intel-bts.o +perf-util-y += auxtrace.o +perf-util-y += intel-pt.o +perf-util-y += intel-bts.o diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c deleted file mode 100644 index 546feda08428..000000000000 --- a/tools/perf/arch/x86/util/archinsn.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "archinsn.h" -#include "event.h" -#include "machine.h" -#include "thread.h" -#include "symbol.h" -#include "../../../../arch/x86/include/asm/insn.h" - -void arch_fetch_insn(struct perf_sample *sample, - struct thread *thread, - struct machine *machine) -{ - struct insn insn; - int len, ret; - bool is64bit = false; - - if (!sample->ip) - return; - len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit); - if (len <= 0) - return; - - ret = insn_decode(&insn, sample->insn, len, - is64bit ? INSN_MODE_64 : INSN_MODE_32); - if (ret >= 0 && insn.length <= len) - sample->insn_len = insn.length; -} diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a0400707180c..3cd384317739 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -91,49 +91,3 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, } #endif - -void arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, u64 type) -{ - union perf_sample_weight weight; - - weight.full = *array; - if (type & PERF_SAMPLE_WEIGHT) - data->weight = weight.full; - else { - data->weight = weight.var1_dw; - data->ins_lat = weight.var2_w; - data->retire_lat = weight.var3_w; - } -} - -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, - __u64 *array, u64 type) -{ - *array = data->weight; - - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - *array &= 0xffffffff; - *array |= ((u64)data->ins_lat << 32); - *array |= ((u64)data->retire_lat << 48); - } -} - -const char *arch_perf_header_entry(const char *se_header) -{ - if (!strcmp(se_header, "Local Pipeline Stage Cycle")) - return "Local Retire Latency"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) - return "Retire Latency"; - - return se_header; -} - -int arch_support_sort_key(const char *sort_key) -{ - if (!strcmp(sort_key, "p_stage_cyc")) - return 1; - if (!strcmp(sort_key, "local_p_stage_cyc")) - return 1; - return 0; -} diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 447a734e591c..75e9d00a1494 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -39,28 +39,13 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) * 26,319,024 slots * 2,427,791 instructions * 2,683,508 topdown-retiring - * - * If slots event and topdown metrics events are not in same group, the - * topdown metrics events must be first event after the slots event group, - * otherwise topdown metrics events can't be regrouped correctly, e.g. - * - * a. perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 - * WARNING: events were regrouped to match PMUs - * Performance counter stats for 'CPU(s) 0': - * 17,923,134 slots - * 2,154,855 instructions - * 3,015,058 cycles - * <not supported> topdown-retiring - * - * If slots event and topdown metrics events are in two groups, the group which - * has topdown metrics events must contain only the topdown metrics event, - * otherwise topdown metrics event can't be regrouped correctly as well, e.g. - * - * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}" -C0 sleep 1 + * e. slots event and metrics event are not in a group and not adjacent + * perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 * WARNING: events were regrouped to match PMUs - * Error: - * The sys_perf_event_open() syscall returned with 22 (Invalid argument) for - * event (topdown-retiring) + * 68,433,522 slots + * 8,856,102 topdown-retiring + * 7,791,494 instructions + * 11,469,513 cycles */ if (topdown_sys_has_perf_metrics() && (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { @@ -76,12 +61,15 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) * topdown metrics events are already in same group with slots * event, do nothing. */ - if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs) && - lhs->core.leader != rhs->core.leader) - return -1; - if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs) && - lhs->core.leader != rhs->core.leader) - return 1; + if (lhs->core.leader != rhs->core.leader) { + bool lhs_topdown = arch_is_topdown_metrics(lhs); + bool rhs_topdown = arch_is_topdown_metrics(rhs); + + if (lhs_topdown && !rhs_topdown) + return -1; + if (!lhs_topdown && rhs_topdown) + return 1; + } } /* Retire latency event should not be group leader*/ @@ -93,3 +81,27 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) /* Default ordering by insertion index. */ return lhs->core.idx - rhs->core.idx; } + +int arch_evlist__add_required_events(struct list_head *list) +{ + struct evsel *pos, *metric_event = NULL; + int idx = 0; + + if (!topdown_sys_has_perf_metrics()) + return 0; + + list_for_each_entry(pos, list, core.node) { + if (arch_is_topdown_slots(pos)) { + /* Slots event already present, nothing to do. */ + return 0; + } + if (metric_event == NULL && arch_is_topdown_metrics(pos)) + metric_event = pos; + idx++; + } + if (metric_event == NULL) { + /* No topdown metric events, nothing to do. */ + return 0; + } + return topdown_insert_slots_event(list, idx + 1, metric_event); +} diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 3dd29ba2c23b..23a8e662a912 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -1,10 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <stdio.h> #include <stdlib.h> +#include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_config.h" #include "util/env.h" #include "util/pmu.h" #include "util/pmus.h" +#include "util/stat.h" +#include "util/strbuf.h" #include "linux/string.h" #include "topdown.h" #include "evsel.h" @@ -23,47 +28,25 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { struct perf_pmu *pmu; - u32 type = evsel->core.attr.type; - /* - * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU - * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine. - * The slots event is only available for the core PMU, which - * supports the perf metrics feature. - * Checking both the PERF_TYPE_RAW type and the slots event - * should be good enough to detect the perf metrics feature. - */ -again: - switch (type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; - if (type) - goto again; - break; - case PERF_TYPE_RAW: - break; - default: + if (!topdown_sys_has_perf_metrics()) return false; - } - pmu = evsel->pmu; - if (pmu && perf_pmu__is_fake(pmu)) - pmu = NULL; - - if (!pmu) { - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - if (pmu->type == PERF_TYPE_RAW) - break; - } - } - return pmu && perf_pmu__have_event(pmu, "slots"); + /* + * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU on a + * non-hybrid machine, "cpu_core" PMU on a hybrid machine. The + * topdown_sys_has_perf_metrics checks the slots event is only available + * for the core PMU, which supports the perf metrics feature. Checking + * both the PERF_TYPE_RAW type and the slots event should be good enough + * to detect the perf metrics feature. + */ + pmu = evsel__find_pmu(evsel); + return pmu && pmu->type == PERF_TYPE_RAW; } bool arch_evsel__must_be_in_group(const struct evsel *evsel) { - if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name || - strcasestr(evsel->name, "uops_retired.slots")) + if (!evsel__sys_has_perf_metrics(evsel)) return false; return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); @@ -89,6 +72,57 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) event_name); } +void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, + struct perf_event_attr *attr) +{ + struct perf_event_attr *prev_attr = NULL; + struct evsel *evsel_prev = NULL; + const char *name = "acr_mask"; + int evsel_idx = 0; + __u64 ev_mask, pr_ev_mask; + + if (!perf_pmu__has_format(evsel->pmu, name)) { + pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name); + return; + } + if (perf_pmu__format_type(evsel->pmu, name) != + PERF_PMU_FORMAT_VALUE_CONFIG2) { + pr_err("'%s' does not have config2 format support\n", evsel->pmu->name); + return; + } + + evsel_prev = evsel__prev(evsel); + if (!evsel_prev) { + pr_err("Previous event does not exist.\n"); + return; + } + + prev_attr = &evsel_prev->core.attr; + + if (prev_attr->config2) { + pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name); + return; + } + + /* + * acr_mask (config2) is calculated using the event's index in + * the event group. The first event will use the index of the + * second event as its mask (e.g., 0x2), indicating that the + * second event counter will be reset and a sample taken for + * the first event if its counter overflows. The second event + * will use the mask consisting of the first and second bits + * (e.g., 0x3), meaning both counters will be reset if the + * second event counter overflows. + */ + + evsel_idx = evsel__group_idx(evsel); + ev_mask = 1ull << evsel_idx; + pr_ev_mask = 1ull << (evsel_idx - 1); + + prev_attr->config2 = ev_mask; + attr->config2 = ev_mask | pr_ev_mask; +} + static void ibs_l3miss_warn(void) { pr_warning( @@ -124,13 +158,15 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) } } -int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) +static int amd_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) { - if (!x86__is_amd_cpu()) + struct perf_pmu *pmu; + + if (evsel->core.attr.precise_ip == 0) return 0; - if (!evsel->core.attr.precise_ip && - !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3))) + pmu = evsel__find_pmu(evsel); + if (!pmu || strncmp(pmu->name, "ibs", 3)) return 0; /* More verbose IBS errors. */ @@ -140,6 +176,54 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try " "again without the privilege modifiers (like 'k') at the end."); } + return 0; +} + +static int intel_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size) +{ + struct strbuf sb = STRBUF_INIT; + int ret; + if (err != EINVAL) + return 0; + + if (!topdown_sys_has_perf_metrics()) + return 0; + + if (arch_is_topdown_slots(evsel)) { + if (!evsel__is_group_leader(evsel)) { + evlist__uniquify_evsel_names(evsel->evlist, &stat_config); + evlist__format_evsels(evsel->evlist, &sb, 2048); + ret = scnprintf(msg, size, "Topdown slots event can only be group leader " + "in '%s'.", sb.buf); + strbuf_release(&sb); + return ret; + } + } else if (arch_is_topdown_metrics(evsel)) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos == evsel || !arch_is_topdown_metrics(pos)) + continue; + + if (pos->core.attr.config != evsel->core.attr.config) + continue; + + evlist__uniquify_evsel_names(evsel->evlist, &stat_config); + evlist__format_evsels(evsel->evlist, &sb, 2048); + ret = scnprintf(msg, size, "Perf metric event '%s' is duplicated " + "in the same group (only one event is allowed) in '%s'.", + evsel__name(evsel), sb.buf); + strbuf_release(&sb); + return ret; + } + } return 0; } + +int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size) +{ + return x86__is_amd_cpu() + ? amd_evsel__open_strerror(evsel, msg, size) + : intel_evsel__open_strerror(evsel, err, msg, size); +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 8f235d8b67b6..c131a727774f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -12,13 +12,13 @@ #include <linux/log2.h> #include <linux/zalloc.h> #include <linux/err.h> -#include <cpuid.h> #include "../../../util/session.h" #include "../../../util/event.h" #include "../../../util/evlist.h" #include "../../../util/evsel.h" #include "../../../util/evsel_config.h" +#include "../../../util/config.h" #include "../../../util/cpumap.h" #include "../../../util/mmap.h" #include <subcmd/parse-options.h> @@ -33,6 +33,7 @@ #include <internal/lib.h> // page_size #include "../../../util/intel-pt.h" #include <api/fs/fs.h> +#include "cpuid.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -52,6 +53,7 @@ struct intel_pt_recording { struct perf_pmu *intel_pt_pmu; int have_sched_switch; struct evlist *evlist; + bool all_switch_events; bool snapshot_mode; bool snapshot_init_done; size_t snapshot_size; @@ -70,7 +72,7 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, int err; parse_events_terms__init(&terms); - err = parse_events_terms(&terms, str, /*input=*/ NULL); + err = parse_events_terms(&terms, str); if (err) goto out_free; @@ -309,7 +311,7 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) { unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; - __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + cpuid(0x15, 0, &eax, &ebx, &ecx, &edx); *n = ebx; *d = eax; } @@ -662,8 +664,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, return 0; if (opts->auxtrace_sample_mode) - evsel__set_config_if_unset(intel_pt_pmu, intel_pt_evsel, - "psb_period", 0); + evsel__set_config_if_unset(intel_pt_evsel, "psb_period", 0); err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); if (err) @@ -794,7 +795,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, bool cpu_wide = !target__none(&opts->target) && !target__has_task(&opts->target); - if (!cpu_wide && perf_can_record_cpu_wide()) { + if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; switch_evsel = evlist__add_dummy_on_all_cpus(evlist); @@ -1178,6 +1179,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt_recording *ptr = data; + + if (!strcmp(var, "intel-pt.all-switch-events")) + ptr->all_switch_events = perf_config_bool(var, value); + + return 0; +} + struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME); @@ -1197,6 +1208,8 @@ struct auxtrace_record *intel_pt_recording_init(int *err) return NULL; } + perf_config(intel_pt_perf_config, ptr); + ptr->intel_pt_pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index 366b44d0bb7e..7442a2cd87ed 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -32,7 +32,7 @@ #define MAX_PATH 1024 #endif -#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" +#define UNCORE_IIO_PMU_PATH "bus/event_source/devices/uncore_iio_%d" #define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH #define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" @@ -403,6 +403,10 @@ void iostat_prefix(struct evlist *evlist, struct iio_root_port *rp = evlist->selected->priv; if (rp) { + /* + * TODO: This is the incorrect format in JSON mode. + * See prepare_timestamp() + */ if (ts) sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s", ts->tv_sec, ts->tv_nsec, diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c deleted file mode 100644 index 424716518b75..000000000000 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ /dev/null @@ -1,213 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <errno.h> -#include <string.h> -#include "../../../util/kvm-stat.h" -#include "../../../util/evsel.h" -#include <asm/svm.h> -#include <asm/vmx.h> -#include <asm/kvm.h> - -define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); -define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); - -static struct kvm_events_ops exit_events = { - .is_begin_event = exit_event_begin, - .is_end_event = exit_event_end, - .decode_key = exit_event_decode_key, - .name = "VM-EXIT" -}; - -const char *vcpu_id_str = "vcpu_id"; -const char *kvm_exit_reason = "exit_reason"; -const char *kvm_entry_trace = "kvm:kvm_entry"; -const char *kvm_exit_trace = "kvm:kvm_exit"; - -/* - * For the mmio events, we treat: - * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry - * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...). - */ -static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample, - struct event_key *key) -{ - key->key = evsel__intval(evsel, sample, "gpa"); - key->info = evsel__intval(evsel, sample, "type"); -} - -#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 -#define KVM_TRACE_MMIO_READ 1 -#define KVM_TRACE_MMIO_WRITE 2 - -static bool mmio_event_begin(struct evsel *evsel, - struct perf_sample *sample, struct event_key *key) -{ - /* MMIO read begin event in kernel. */ - if (kvm_exit_event(evsel)) - return true; - - /* MMIO write begin event in kernel. */ - if (evsel__name_is(evsel, "kvm:kvm_mmio") && - evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { - mmio_event_get_key(evsel, sample, key); - return true; - } - - return false; -} - -static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample, - struct event_key *key) -{ - /* MMIO write end event in kernel. */ - if (kvm_entry_event(evsel)) - return true; - - /* MMIO read end event in kernel.*/ - if (evsel__name_is(evsel, "kvm:kvm_mmio") && - evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { - mmio_event_get_key(evsel, sample, key); - return true; - } - - return false; -} - -static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, - struct event_key *key, - char *decode) -{ - scnprintf(decode, KVM_EVENT_NAME_LEN, "%#lx:%s", - (unsigned long)key->key, - key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); -} - -static struct kvm_events_ops mmio_events = { - .is_begin_event = mmio_event_begin, - .is_end_event = mmio_event_end, - .decode_key = mmio_event_decode_key, - .name = "MMIO Access" -}; - - /* The time of emulation pio access is from kvm_pio to kvm_entry. */ -static void ioport_event_get_key(struct evsel *evsel, - struct perf_sample *sample, - struct event_key *key) -{ - key->key = evsel__intval(evsel, sample, "port"); - key->info = evsel__intval(evsel, sample, "rw"); -} - -static bool ioport_event_begin(struct evsel *evsel, - struct perf_sample *sample, - struct event_key *key) -{ - if (evsel__name_is(evsel, "kvm:kvm_pio")) { - ioport_event_get_key(evsel, sample, key); - return true; - } - - return false; -} - -static bool ioport_event_end(struct evsel *evsel, - struct perf_sample *sample __maybe_unused, - struct event_key *key __maybe_unused) -{ - return kvm_entry_event(evsel); -} - -static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, - struct event_key *key, - char *decode) -{ - scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s", - (unsigned long long)key->key, - key->info ? "POUT" : "PIN"); -} - -static struct kvm_events_ops ioport_events = { - .is_begin_event = ioport_event_begin, - .is_end_event = ioport_event_end, - .decode_key = ioport_event_decode_key, - .name = "IO Port Access" -}; - - /* The time of emulation msr is from kvm_msr to kvm_entry. */ -static void msr_event_get_key(struct evsel *evsel, - struct perf_sample *sample, - struct event_key *key) -{ - key->key = evsel__intval(evsel, sample, "ecx"); - key->info = evsel__intval(evsel, sample, "write"); -} - -static bool msr_event_begin(struct evsel *evsel, - struct perf_sample *sample, - struct event_key *key) -{ - if (evsel__name_is(evsel, "kvm:kvm_msr")) { - msr_event_get_key(evsel, sample, key); - return true; - } - - return false; -} - -static bool msr_event_end(struct evsel *evsel, - struct perf_sample *sample __maybe_unused, - struct event_key *key __maybe_unused) -{ - return kvm_entry_event(evsel); -} - -static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, - struct event_key *key, - char *decode) -{ - scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s", - (unsigned long long)key->key, - key->info ? "W" : "R"); -} - -static struct kvm_events_ops msr_events = { - .is_begin_event = msr_event_begin, - .is_end_event = msr_event_end, - .decode_key = msr_event_decode_key, - .name = "MSR Access" -}; - -const char *kvm_events_tp[] = { - "kvm:kvm_entry", - "kvm:kvm_exit", - "kvm:kvm_mmio", - "kvm:kvm_pio", - "kvm:kvm_msr", - NULL, -}; - -struct kvm_reg_events_ops kvm_reg_events_ops[] = { - { .name = "vmexit", .ops = &exit_events }, - { .name = "mmio", .ops = &mmio_events }, - { .name = "ioport", .ops = &ioport_events }, - { .name = "msr", .ops = &msr_events }, - { NULL, NULL }, -}; - -const char * const kvm_skip_events[] = { - "HLT", - NULL, -}; - -int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) -{ - if (strstr(cpuid, "Intel")) { - kvm->exit_reasons = vmx_exit_reasons; - kvm->exit_reasons_isa = "VMX"; - } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) { - kvm->exit_reasons = svm_exit_reasons; - kvm->exit_reasons_isa = "SVM"; - } else - return -ENOTSUP; - - return 0; -} diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 62df03e91c7e..b38f519020ff 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -26,3 +26,9 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { E(NULL, NULL, NULL, false, 0), E("mem-ldst", "%s//", NULL, false, 0), }; + +struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL, false, 0), + E(NULL, NULL, NULL, false, 0), + E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0), +}; diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h index f55c8d3b7d59..11e09a256f5b 100644 --- a/tools/perf/arch/x86/util/mem-events.h +++ b/tools/perf/arch/x86/util/mem-events.h @@ -6,5 +6,6 @@ extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX]; +extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX]; #endif /* _X86_MEM_EVENTS_H */ diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c deleted file mode 100644 index 12fd93f04802..000000000000 --- a/tools/perf/arch/x86/util/perf_regs.c +++ /dev/null @@ -1,330 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <errno.h> -#include <string.h> -#include <regex.h> -#include <linux/kernel.h> -#include <linux/zalloc.h> - -#include "perf_regs.h" -#include "../../../perf-sys.h" -#include "../../../util/perf_regs.h" -#include "../../../util/debug.h" -#include "../../../util/event.h" -#include "../../../util/pmu.h" -#include "../../../util/pmus.h" - -static const struct sample_reg sample_reg_masks[] = { - SMPL_REG(AX, PERF_REG_X86_AX), - SMPL_REG(BX, PERF_REG_X86_BX), - SMPL_REG(CX, PERF_REG_X86_CX), - SMPL_REG(DX, PERF_REG_X86_DX), - SMPL_REG(SI, PERF_REG_X86_SI), - SMPL_REG(DI, PERF_REG_X86_DI), - SMPL_REG(BP, PERF_REG_X86_BP), - SMPL_REG(SP, PERF_REG_X86_SP), - SMPL_REG(IP, PERF_REG_X86_IP), - SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), - SMPL_REG(CS, PERF_REG_X86_CS), - SMPL_REG(SS, PERF_REG_X86_SS), -#ifdef HAVE_ARCH_X86_64_SUPPORT - SMPL_REG(R8, PERF_REG_X86_R8), - SMPL_REG(R9, PERF_REG_X86_R9), - SMPL_REG(R10, PERF_REG_X86_R10), - SMPL_REG(R11, PERF_REG_X86_R11), - SMPL_REG(R12, PERF_REG_X86_R12), - SMPL_REG(R13, PERF_REG_X86_R13), - SMPL_REG(R14, PERF_REG_X86_R14), - SMPL_REG(R15, PERF_REG_X86_R15), -#endif - SMPL_REG2(XMM0, PERF_REG_X86_XMM0), - SMPL_REG2(XMM1, PERF_REG_X86_XMM1), - SMPL_REG2(XMM2, PERF_REG_X86_XMM2), - SMPL_REG2(XMM3, PERF_REG_X86_XMM3), - SMPL_REG2(XMM4, PERF_REG_X86_XMM4), - SMPL_REG2(XMM5, PERF_REG_X86_XMM5), - SMPL_REG2(XMM6, PERF_REG_X86_XMM6), - SMPL_REG2(XMM7, PERF_REG_X86_XMM7), - SMPL_REG2(XMM8, PERF_REG_X86_XMM8), - SMPL_REG2(XMM9, PERF_REG_X86_XMM9), - SMPL_REG2(XMM10, PERF_REG_X86_XMM10), - SMPL_REG2(XMM11, PERF_REG_X86_XMM11), - SMPL_REG2(XMM12, PERF_REG_X86_XMM12), - SMPL_REG2(XMM13, PERF_REG_X86_XMM13), - SMPL_REG2(XMM14, PERF_REG_X86_XMM14), - SMPL_REG2(XMM15, PERF_REG_X86_XMM15), - SMPL_REG_END -}; - -struct sdt_name_reg { - const char *sdt_name; - const char *uprobe_name; -}; -#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} -#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} - -static const struct sdt_name_reg sdt_reg_tbl[] = { - SDT_NAME_REG(eax, ax), - SDT_NAME_REG(rax, ax), - SDT_NAME_REG(al, ax), - SDT_NAME_REG(ah, ax), - SDT_NAME_REG(ebx, bx), - SDT_NAME_REG(rbx, bx), - SDT_NAME_REG(bl, bx), - SDT_NAME_REG(bh, bx), - SDT_NAME_REG(ecx, cx), - SDT_NAME_REG(rcx, cx), - SDT_NAME_REG(cl, cx), - SDT_NAME_REG(ch, cx), - SDT_NAME_REG(edx, dx), - SDT_NAME_REG(rdx, dx), - SDT_NAME_REG(dl, dx), - SDT_NAME_REG(dh, dx), - SDT_NAME_REG(esi, si), - SDT_NAME_REG(rsi, si), - SDT_NAME_REG(sil, si), - SDT_NAME_REG(edi, di), - SDT_NAME_REG(rdi, di), - SDT_NAME_REG(dil, di), - SDT_NAME_REG(ebp, bp), - SDT_NAME_REG(rbp, bp), - SDT_NAME_REG(bpl, bp), - SDT_NAME_REG(rsp, sp), - SDT_NAME_REG(esp, sp), - SDT_NAME_REG(spl, sp), - - /* rNN registers */ - SDT_NAME_REG(r8b, r8), - SDT_NAME_REG(r8w, r8), - SDT_NAME_REG(r8d, r8), - SDT_NAME_REG(r9b, r9), - SDT_NAME_REG(r9w, r9), - SDT_NAME_REG(r9d, r9), - SDT_NAME_REG(r10b, r10), - SDT_NAME_REG(r10w, r10), - SDT_NAME_REG(r10d, r10), - SDT_NAME_REG(r11b, r11), - SDT_NAME_REG(r11w, r11), - SDT_NAME_REG(r11d, r11), - SDT_NAME_REG(r12b, r12), - SDT_NAME_REG(r12w, r12), - SDT_NAME_REG(r12d, r12), - SDT_NAME_REG(r13b, r13), - SDT_NAME_REG(r13w, r13), - SDT_NAME_REG(r13d, r13), - SDT_NAME_REG(r14b, r14), - SDT_NAME_REG(r14w, r14), - SDT_NAME_REG(r14d, r14), - SDT_NAME_REG(r15b, r15), - SDT_NAME_REG(r15w, r15), - SDT_NAME_REG(r15d, r15), - SDT_NAME_REG_END, -}; - -/* - * Perf only supports OP which is in +/-NUM(REG) form. - * Here plus-minus sign, NUM and parenthesis are optional, - * only REG is mandatory. - * - * SDT events also supports indirect addressing mode with a - * symbol as offset, scaled mode and constants in OP. But - * perf does not support them yet. Below are few examples. - * - * OP with scaled mode: - * (%rax,%rsi,8) - * 10(%ras,%rsi,8) - * - * OP with indirect addressing mode: - * check_action(%rip) - * mp_+52(%rip) - * 44+mp_(%rip) - * - * OP with constant values: - * $0 - * $123 - * $-1 - */ -#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$" - -static regex_t sdt_op_regex; - -static int sdt_init_op_regex(void) -{ - static int initialized; - int ret = 0; - - if (initialized) - return 0; - - ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED); - if (ret < 0) { - pr_debug4("Regex compilation error.\n"); - return ret; - } - - initialized = 1; - return 0; -} - -/* - * Max x86 register name length is 5(ex: %r15d). So, 6th char - * should always contain NULL. This helps to find register name - * length using strlen, instead of maintaining one more variable. - */ -#define SDT_REG_NAME_SIZE 6 - -/* - * The uprobe parser does not support all gas register names; - * so, we have to replace them (ex. for x86_64: %rax -> %ax). - * Note: If register does not require renaming, just copy - * paste as it is, but don't leave it empty. - */ -static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg) -{ - int i = 0; - - for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) { - if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) { - strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name); - return; - } - } - - strncpy(uprobe_reg, sdt_reg, sdt_len); -} - -int arch_sdt_arg_parse_op(char *old_op, char **new_op) -{ - char new_reg[SDT_REG_NAME_SIZE] = {0}; - int new_len = 0, ret; - /* - * rm[0]: +/-NUM(REG) - * rm[1]: +/- - * rm[2]: NUM - * rm[3]: ( - * rm[4]: REG - * rm[5]: ) - */ - regmatch_t rm[6]; - /* - * Max prefix length is 2 as it may contains sign(+/-) - * and displacement 0 (Both sign and displacement 0 are - * optional so it may be empty). Use one more character - * to hold last NULL so that strlen can be used to find - * prefix length, instead of maintaining one more variable. - */ - char prefix[3] = {0}; - - ret = sdt_init_op_regex(); - if (ret < 0) - return ret; - - /* - * If unsupported OR does not match with regex OR - * register name too long, skip it. - */ - if (strchr(old_op, ',') || strchr(old_op, '$') || - regexec(&sdt_op_regex, old_op, 6, rm, 0) || - rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) { - pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); - return SDT_ARG_SKIP; - } - - /* - * Prepare prefix. - * If SDT OP has parenthesis but does not provide - * displacement, add 0 for displacement. - * SDT Uprobe Prefix - * ----------------------------- - * +24(%rdi) +24(%di) + - * 24(%rdi) +24(%di) + - * %rdi %di - * (%rdi) +0(%di) +0 - * -80(%rbx) -80(%bx) - - */ - if (rm[3].rm_so != rm[3].rm_eo) { - if (rm[1].rm_so != rm[1].rm_eo) - prefix[0] = *(old_op + rm[1].rm_so); - else if (rm[2].rm_so != rm[2].rm_eo) - prefix[0] = '+'; - else - scnprintf(prefix, sizeof(prefix), "+0"); - } - - /* Rename register */ - sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so, - new_reg); - - /* Prepare final OP which should be valid for uprobe_events */ - new_len = strlen(prefix) + - (rm[2].rm_eo - rm[2].rm_so) + - (rm[3].rm_eo - rm[3].rm_so) + - strlen(new_reg) + - (rm[5].rm_eo - rm[5].rm_so) + - 1; /* NULL */ - - *new_op = zalloc(new_len); - if (!*new_op) - return -ENOMEM; - - scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s", - strlen(prefix), prefix, - (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, - (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so, - strlen(new_reg), new_reg, - (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so); - - return SDT_ARG_VALID; -} - -const struct sample_reg *arch__sample_reg_masks(void) -{ - return sample_reg_masks; -} - -uint64_t arch__intr_reg_mask(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .sample_type = PERF_SAMPLE_REGS_INTR, - .sample_regs_intr = PERF_REG_EXTENDED_MASK, - .precise_ip = 1, - .disabled = 1, - .exclude_kernel = 1, - }; - int fd; - /* - * In an unnamed union, init it here to build on older gcc versions - */ - attr.sample_period = 1; - - if (perf_pmus__num_core_pmus() > 1) { - struct perf_pmu *pmu = NULL; - __u64 type = PERF_TYPE_RAW; - - /* - * The same register set is supported among different hybrid PMUs. - * Only check the first available one. - */ - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - type = pmu->type; - break; - } - attr.config |= type << PERF_PMU_TYPE_SHIFT; - } - - event_attr_init(&attr); - - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); - if (fd != -1) { - close(fd); - return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK); - } - - return PERF_REGS_MASK; -} - -uint64_t arch__user_reg_mask(void) -{ - return PERF_REGS_MASK; -} diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e0060dac2a9f..7c9d238922a6 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -5,9 +5,11 @@ #include <dirent.h> #include <fcntl.h> #include <linux/stddef.h> +#include <linux/string.h> #include <linux/perf_event.h> -#include <linux/zalloc.h> #include <api/fs/fs.h> +#include <api/io_dir.h> +#include <internal/cpumap.h> #include <errno.h> #include "../../../util/intel-pt.h" @@ -16,11 +18,257 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" +#include "util/debug.h" #include "util/env.h" +#include "util/header.h" -void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) +static bool x86__is_intel_graniterapids(void) { -#ifdef HAVE_AUXTRACE_SUPPORT + static bool checked_if_graniterapids; + static bool is_graniterapids; + + if (!checked_if_graniterapids) { + const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]"; + char *cpuid = get_cpuid_str((struct perf_cpu){0}); + + is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0; + free(cpuid); + checked_if_graniterapids = true; + } + return is_graniterapids; +} + +static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path) +{ + struct perf_cpu_map *cpus; + char *buf = NULL; + size_t buf_len; + + if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0) + return NULL; + + cpus = perf_cpu_map__new(buf); + free(buf); + return cpus; +} + +static int snc_nodes_per_l3_cache(void) +{ + static bool checked_snc; + static int snc_nodes; + + if (!checked_snc) { + struct perf_cpu_map *node_cpus = + read_sysfs_cpu_map("devices/system/node/node0/cpulist"); + struct perf_cpu_map *cache_cpus = + read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list"); + + snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus); + perf_cpu_map__put(cache_cpus); + perf_cpu_map__put(node_cpus); + checked_snc = true; + } + return snc_nodes; +} + +static int num_chas(void) +{ + static bool checked_chas; + static int num_chas; + + if (!checked_chas) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + + if (fd < 0) + return -1; + + io_dir__init(&dir, fd); + + while ((dent = io_dir__readdir(&dir)) != NULL) { + /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */ + if (strstarts(dent->d_name, "uncore_cha_")) + num_chas++; + } + close(fd); + checked_chas = true; + } + return num_chas; +} + +#define MAX_SNCS 6 + +static int uncore_cha_snc(struct perf_pmu *pmu) +{ + // CHA SNC numbers are ordered correspond to the CHAs number. + unsigned int cha_num; + int num_cha, chas_per_node, cha_snc; + int snc_nodes = snc_nodes_per_l3_cache(); + + if (snc_nodes <= 1) + return 0; + + num_cha = num_chas(); + if (num_cha <= 0) { + pr_warning("Unexpected: no CHAs found\n"); + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) { + pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name); + return 0; + } + chas_per_node = num_cha / snc_nodes; + cha_snc = cha_num / chas_per_node; + + /* Range check cha_snc. for unexpected out of bounds. */ + return cha_snc >= MAX_SNCS ? 0 : cha_snc; +} + +static int uncore_imc_snc(struct perf_pmu *pmu) +{ + // Compute the IMC SNC using lookup tables. + unsigned int imc_num; + int snc_nodes = snc_nodes_per_l3_cache(); + const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0}; + const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2}; + const u8 *snc_map; + size_t snc_map_len; + + switch (snc_nodes) { + case 2: + snc_map = snc2_map; + snc_map_len = ARRAY_SIZE(snc2_map); + break; + case 3: + snc_map = snc3_map; + snc_map_len = ARRAY_SIZE(snc3_map); + break; + default: + /* Error or no lookup support for SNC with >3 nodes. */ + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) { + pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name); + return 0; + } + if (imc_num >= snc_map_len) { + pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes); + return 0; + } + return snc_map[imc_num]; +} + +static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc) +{ + static bool checked_cpu_adjust[MAX_SNCS]; + static int cpu_adjust[MAX_SNCS]; + struct perf_cpu_map *node_cpus; + char node_path[] = "devices/system/node/node0/cpulist"; + + /* Was adjust already computed? */ + if (checked_cpu_adjust[pmu_snc]) + return cpu_adjust[pmu_snc]; + + /* SNC0 doesn't need an adjust. */ + if (pmu_snc == 0) { + cpu_adjust[0] = 0; + checked_cpu_adjust[0] = true; + return 0; + } + + /* + * Use NUMA topology to compute first CPU of the NUMA node, we want to + * adjust CPU 0 to be this and similarly for other CPUs if there is >1 + * socket. + */ + assert(pmu_snc >= 0 && pmu_snc <= 9); + node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>. + node_cpus = read_sysfs_cpu_map(node_path); + cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu; + if (cpu_adjust[pmu_snc] < 0) { + pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path); + cpu_adjust[pmu_snc] = 0; + } else { + checked_cpu_adjust[pmu_snc] = true; + } + perf_cpu_map__put(node_cpus); + return cpu_adjust[pmu_snc]; +} + +static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha) +{ + // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the + // topology. For example, a two socket graniterapids machine may be set + // up with 3-way SNC meaning there are 6 NUMA nodes that should be + // displayed with --per-node. The cpumask of the CHA and IMC PMUs + // reflects per-socket information meaning, for example, uncore_cha_60 + // on a two socket graniterapids machine with 120 cores per socket will + // have a cpumask of "0,120". This cpumask needs adjusting to "40,160" + // to reflect that uncore_cha_60 is used for the 2nd SNC of each + // socket. Without the adjustment events on uncore_cha_60 will appear in + // node 0 and node 3 (in our example 2 socket 3-way set up), but with + // the adjustment they will appear in node 1 and node 4. The number of + // CHAs is typically larger than the number of cores. The CHA numbers + // are assumed to split evenly and inorder wrt core numbers. There are + // fewer memory IMC PMUs than cores and mapping is handled using lookup + // tables. + static struct perf_cpu_map *cha_adjusted[MAX_SNCS]; + static struct perf_cpu_map *imc_adjusted[MAX_SNCS]; + struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted; + unsigned int idx; + int pmu_snc, cpu_adjust; + struct perf_cpu cpu; + bool alloc; + + // Cpus from the kernel holds first CPU of each socket. e.g. 0,120. + if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) { + pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name); + return; + } + + pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu); + if (pmu_snc == 0) { + // No adjustment necessary for the first SNC. + return; + } + + alloc = adjusted[pmu_snc] == NULL; + if (alloc) { + // Hold onto the perf_cpu_map globally to avoid recomputation. + cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc); + adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus)); + if (!adjusted[pmu_snc]) + return; + } + + perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) { + // Compute the new cpu map values or if not allocating, assert + // that they match expectations. asserts will be removed to + // avoid overhead in NDEBUG builds. + if (alloc) { + RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust; + } else if (idx == 0) { + cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu; + assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust); + } else { + assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu == + cpu.cpu + cpu_adjust); + } + } + + perf_cpu_map__put(pmu->cpus); + pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]); +} + +void perf_pmu__arch_init(struct perf_pmu *pmu) +{ + struct perf_pmu_caps *ldlat_cap; + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) { pmu->auxtrace = true; pmu->selectable = true; @@ -30,15 +278,33 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) pmu->auxtrace = true; pmu->selectable = true; } -#endif if (x86__is_amd_cpu()) { - if (!strcmp(pmu->name, "ibs_op")) - pmu->mem_events = perf_mem_events_amd; - } else if (pmu->is_core) { - if (perf_pmu__have_event(pmu, "mem-loads-aux")) - pmu->mem_events = perf_mem_events_intel_aux; - else - pmu->mem_events = perf_mem_events_intel; + if (strcmp(pmu->name, "ibs_op")) + return; + + pmu->mem_events = perf_mem_events_amd; + + if (!perf_pmu__caps_parse(pmu)) + return; + + ldlat_cap = perf_pmu__get_cap(pmu, "ldlat"); + if (!ldlat_cap || strcmp(ldlat_cap->value, "1")) + return; + + perf_mem_events__loads_ldlat = 0; + pmu->mem_events = perf_mem_events_amd_ldlat; + } else { + if (pmu->is_core) { + if (perf_pmu__have_event(pmu, "mem-loads-aux")) + pmu->mem_events = perf_mem_events_intel_aux; + else + pmu->mem_events = perf_mem_events_intel; + } else if (x86__is_intel_graniterapids()) { + if (strstarts(pmu->name, "uncore_cha_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true); + else if (strstarts(pmu->name, "uncore_imc_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false); + } } } diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index f63747d0abdf..bafd285119d7 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "api/fs/fs.h" -#include "util/evsel.h" +#include <errno.h> #include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" @@ -8,6 +7,9 @@ #include "topdown.h" #include "evsel.h" +// cmask=0, inv=0, pc=0, edge=0, umask=4, event=0 +#define TOPDOWN_SLOTS 0x0400 + /* Check whether there is a PMU which supports the perf metrics. */ bool topdown_sys_has_perf_metrics(void) { @@ -32,31 +34,19 @@ bool topdown_sys_has_perf_metrics(void) return has_perf_metrics; } -#define TOPDOWN_SLOTS 0x0400 bool arch_is_topdown_slots(const struct evsel *evsel) { - if (evsel->core.attr.config == TOPDOWN_SLOTS) - return true; - - return false; + return evsel->core.attr.type == PERF_TYPE_RAW && + evsel->core.attr.config == TOPDOWN_SLOTS && + evsel->core.attr.config1 == 0; } bool arch_is_topdown_metrics(const struct evsel *evsel) { - int config = evsel->core.attr.config; - const char *name_from_config; - struct perf_pmu *pmu; - - /* All topdown events have an event code of 0. */ - if ((config & 0xFF) != 0) - return false; - - pmu = evsel__find_pmu(evsel); - if (!pmu || !pmu->is_core) - return false; - - name_from_config = perf_pmu__name_from_config(pmu, config); - return name_from_config && strcasestr(name_from_config, "topdown"); + // cmask=0, inv=0, pc=0, edge=0, umask=0x80-0x87, event=0 + return evsel->core.attr.type == PERF_TYPE_RAW && + (evsel->core.attr.config & 0xFFFFF8FF) == 0x8000 && + evsel->core.attr.config1 == 0; } /* @@ -81,10 +71,38 @@ bool arch_topdown_sample_read(struct evsel *leader) */ evlist__for_each_entry(leader->evlist, evsel) { if (evsel->core.leader != leader->core.leader) - return false; + continue; if (evsel != leader && arch_is_topdown_metrics(evsel)) return true; } return false; } + +/* + * Make a copy of the topdown metric event metric_event with the given index but + * change its configuration to be a topdown slots event. Copying from + * metric_event ensures modifiers are the same. + */ +int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event) +{ + struct evsel *evsel = evsel__new_idx(&metric_event->core.attr, idx); + + if (!evsel) + return -ENOMEM; + + evsel->core.attr.config = TOPDOWN_SLOTS; + evsel->core.cpus = perf_cpu_map__get(metric_event->core.cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(metric_event->core.pmu_cpus); + evsel->core.is_pmu_core = true; + evsel->pmu = metric_event->pmu; + evsel->name = strdup("slots"); + evsel->precise_max = metric_event->precise_max; + evsel->sample_read = metric_event->sample_read; + evsel->weak_group = metric_event->weak_group; + evsel->bpf_counter = metric_event->bpf_counter; + evsel->retire_lat = metric_event->retire_lat; + evsel__set_leader(evsel, evsel__leader(metric_event)); + list_add_tail(&evsel->core.node, list); + return 0; +} diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 1bae9b1822d7..69035565e649 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -2,8 +2,14 @@ #ifndef _TOPDOWN_H #define _TOPDOWN_H 1 +#include <stdbool.h> + +struct evsel; +struct list_head; + bool topdown_sys_has_perf_metrics(void); bool arch_is_topdown_slots(const struct evsel *evsel); bool arch_is_topdown_metrics(const struct evsel *evsel); +int topdown_insert_slots_event(struct list_head *list, int idx, struct evsel *metric_event); #endif diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c deleted file mode 100644 index edb77e20e083..000000000000 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <elfutils/libdwfl.h> -#include "perf_regs.h" -#include "../../../util/unwind-libdw.h" -#include "../../../util/perf_regs.h" -#include "util/sample.h" - -bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) -{ - struct unwind_info *ui = arg; - struct regs_dump *user_regs = &ui->sample->user_regs; - Dwarf_Word dwarf_regs[17]; - unsigned nregs; - -#define REG(r) ({ \ - Dwarf_Word val = 0; \ - perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \ - val; \ -}) - - if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) { - dwarf_regs[0] = REG(AX); - dwarf_regs[1] = REG(CX); - dwarf_regs[2] = REG(DX); - dwarf_regs[3] = REG(BX); - dwarf_regs[4] = REG(SP); - dwarf_regs[5] = REG(BP); - dwarf_regs[6] = REG(SI); - dwarf_regs[7] = REG(DI); - dwarf_regs[8] = REG(IP); - nregs = 9; - } else { - dwarf_regs[0] = REG(AX); - dwarf_regs[1] = REG(DX); - dwarf_regs[2] = REG(CX); - dwarf_regs[3] = REG(BX); - dwarf_regs[4] = REG(SI); - dwarf_regs[5] = REG(DI); - dwarf_regs[6] = REG(BP); - dwarf_regs[7] = REG(SP); - dwarf_regs[8] = REG(R8); - dwarf_regs[9] = REG(R9); - dwarf_regs[10] = REG(R10); - dwarf_regs[11] = REG(R11); - dwarf_regs[12] = REG(R12); - dwarf_regs[13] = REG(R13); - dwarf_regs[14] = REG(R14); - dwarf_regs[15] = REG(R15); - dwarf_regs[16] = REG(IP); - nregs = 17; - } - - return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs); -} |
