diff options
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/intel_aet.c')
| -rw-r--r-- | arch/x86/kernel/cpu/resctrl/intel_aet.c | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c new file mode 100644 index 000000000000..89b8b619d5d5 --- /dev/null +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Resource Director Technology(RDT) + * - Intel Application Energy Telemetry + * + * Copyright (C) 2025 Intel Corporation + * + * Author: + * Tony Luck <tony.luck@intel.com> + */ + +#define pr_fmt(fmt) "resctrl: " fmt + +#include <linux/bits.h> +#include <linux/compiler_types.h> +#include <linux/container_of.h> +#include <linux/cpumask.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp_types.h> +#include <linux/init.h> +#include <linux/intel_pmt_features.h> +#include <linux/intel_vsec.h> +#include <linux/io.h> +#include <linux/minmax.h> +#include <linux/printk.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/resctrl.h> +#include <linux/resctrl_types.h> +#include <linux/slab.h> +#include <linux/stddef.h> +#include <linux/topology.h> +#include <linux/types.h> + +#include "internal.h" + +/** + * struct pmt_event - Telemetry event. + * @id: Resctrl event id. + * @idx: Counter index within each per-RMID block of counters. + * @bin_bits: Zero for integer valued events, else number bits in fraction + * part of fixed-point. + */ +struct pmt_event { + enum resctrl_event_id id; + unsigned int idx; + unsigned int bin_bits; +}; + +#define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits } + +/** + * struct event_group - Events with the same feature type ("energy" or "perf") and GUID. + * @pfname: PMT feature name ("energy" or "perf") of this event group. + * Used by boot rdt= option. + * @pfg: Points to the aggregated telemetry space information + * returned by the intel_pmt_get_regions_by_feature() + * call to the INTEL_PMT_TELEMETRY driver that contains + * data for all telemetry regions of type @pfname. + * Valid if the system supports the event group, + * NULL otherwise. + * @force_off: True when "rdt" command line or architecture code disables + * this event group due to insufficient RMIDs. + * @force_on: True when "rdt" command line overrides disable of this + * event group. + * @guid: Unique number per XML description file. + * @num_rmid: Number of RMIDs supported by this group. May be + * adjusted downwards if enumeration from + * intel_pmt_get_regions_by_feature() indicates fewer + * RMIDs can be tracked simultaneously. + * @mmio_size: Number of bytes of MMIO registers for this group. + * @num_events: Number of events in this group. + * @evts: Array of event descriptors. + */ +struct event_group { + /* Data fields for additional structures to manage this group. */ + const char *pfname; + struct pmt_feature_group *pfg; + bool force_off, force_on; + + /* Remaining fields initialized from XML file. */ + u32 guid; + u32 num_rmid; + size_t mmio_size; + unsigned int num_events; + struct pmt_event evts[] __counted_by(num_events); +}; + +#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \ + (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64)) + +/* + * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml + */ +static struct event_group energy_0x26696143 = { + .pfname = "energy", + .guid = 0x26696143, + .num_rmid = 576, + .mmio_size = XML_MMIO_SIZE(576, 2, 3), + .num_events = 2, + .evts = { + EVT(PMT_EVENT_ENERGY, 0, 18), + EVT(PMT_EVENT_ACTIVITY, 1, 18), + } +}; + +/* + * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml + */ +static struct event_group perf_0x26557651 = { + .pfname = "perf", + .guid = 0x26557651, + .num_rmid = 576, + .mmio_size = XML_MMIO_SIZE(576, 7, 3), + .num_events = 7, + .evts = { + EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0), + EVT(PMT_EVENT_C1_RES, 1, 0), + EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0), + EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0), + EVT(PMT_EVENT_AUTO_C6_RES, 4, 0), + EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0), + EVT(PMT_EVENT_UOPS_RETIRED, 6, 0), + } +}; + +static struct event_group *known_event_groups[] = { + &energy_0x26696143, + &perf_0x26557651, +}; + +#define for_each_event_group(_peg) \ + for (_peg = known_event_groups; \ + _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \ + _peg++) + +bool intel_handle_aet_option(bool force_off, char *tok) +{ + struct event_group **peg; + bool ret = false; + u32 guid = 0; + char *name; + + if (!tok) + return false; + + name = strsep(&tok, ":"); + if (tok && kstrtou32(tok, 16, &guid)) + return false; + + for_each_event_group(peg) { + if (strcmp(name, (*peg)->pfname)) + continue; + if (guid && (*peg)->guid != guid) + continue; + if (force_off) + (*peg)->force_off = true; + else + (*peg)->force_on = true; + ret = true; + } + + return ret; +} + +static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e) +{ + if (tr->guid != e->guid) + return true; + if (tr->plat_info.package_id >= topology_max_packages()) { + pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id, + tr->guid); + return true; + } + if (tr->size != e->mmio_size) { + pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n", + tr->size, e->guid, e->mmio_size); + return true; + } + + return false; +} + +static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p) +{ + bool usable_regions = false; + + for (int i = 0; i < p->count; i++) { + if (skip_telem_region(&p->regions[i], e)) { + /* + * Clear the address field of regions that did not pass the checks in + * skip_telem_region() so they will not be used by intel_aet_read_event(). + * This is safe to do because intel_pmt_get_regions_by_feature() allocates + * a new pmt_feature_group structure to return to each caller and only makes + * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group() + * returns the structure. + */ + p->regions[i].addr = NULL; + + continue; + } + usable_regions = true; + } + + return usable_regions; +} + +static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p) +{ + struct telemetry_region *tr; + + for (int i = 0; i < p->count; i++) { + if (!p->regions[i].addr) + continue; + tr = &p->regions[i]; + if (tr->num_rmids < e->num_rmid) { + e->force_off = true; + return false; + } + } + + return true; +} + +static bool enable_events(struct event_group *e, struct pmt_feature_group *p) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; + int skipped_events = 0; + + if (e->force_off) + return false; + + if (!group_has_usable_regions(e, p)) + return false; + + /* + * Only enable event group with insufficient RMIDs if the user requested + * it from the kernel command line. + */ + if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) { + pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n", + r->name, e->pfname, e->guid); + return false; + } + + for (int i = 0; i < p->count; i++) { + if (!p->regions[i].addr) + continue; + /* + * e->num_rmid only adjusted lower if user (via rdt= kernel + * parameter) forces an event group with insufficient RMID + * to be enabled. + */ + e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids); + } + + for (int j = 0; j < e->num_events; j++) { + if (!resctrl_enable_mon_event(e->evts[j].id, true, + e->evts[j].bin_bits, &e->evts[j])) + skipped_events++; + } + if (e->num_events == skipped_events) { + pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); + return false; + } + + if (r->mon.num_rmid) + r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid); + else + r->mon.num_rmid = e->num_rmid; + + if (skipped_events) + pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name, + e->pfname, e->guid, skipped_events); + else + pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid); + + return true; +} + +static enum pmt_feature_id lookup_pfid(const char *pfname) +{ + if (!strcmp(pfname, "energy")) + return FEATURE_PER_RMID_ENERGY_TELEM; + else if (!strcmp(pfname, "perf")) + return FEATURE_PER_RMID_PERF_TELEM; + + pr_warn("Unknown PMT feature name '%s'\n", pfname); + + return FEATURE_INVALID; +} + +/* + * Request a copy of struct pmt_feature_group for each event group. If there is + * one, the returned structure has an array of telemetry_region structures, + * each element of the array describes one telemetry aggregator. The + * telemetry aggregators may have different GUIDs so obtain duplicate struct + * pmt_feature_group for event groups with same feature type but different + * GUID. Post-processing ensures an event group can only use the telemetry + * aggregators that match its GUID. An event group keeps a pointer to its + * struct pmt_feature_group to indicate that its events are successfully + * enabled. + */ +bool intel_aet_get_events(void) +{ + struct pmt_feature_group *p; + enum pmt_feature_id pfid; + struct event_group **peg; + bool ret = false; + + for_each_event_group(peg) { + pfid = lookup_pfid((*peg)->pfname); + p = intel_pmt_get_regions_by_feature(pfid); + if (IS_ERR_OR_NULL(p)) + continue; + if (enable_events(*peg, p)) { + (*peg)->pfg = p; + ret = true; + } else { + intel_pmt_put_feature_group(p); + } + } + + return ret; +} + +void __exit intel_aet_exit(void) +{ + struct event_group **peg; + + for_each_event_group(peg) { + if ((*peg)->pfg) { + intel_pmt_put_feature_group((*peg)->pfg); + (*peg)->pfg = NULL; + } + } +} + +#define DATA_VALID BIT_ULL(63) +#define DATA_BITS GENMASK_ULL(62, 0) + +/* + * Read counter for an event on a domain (summing all aggregators on the + * domain). If an aggregator hasn't received any data for a specific RMID, + * the MMIO read indicates that data is not valid. Return success if at + * least one aggregator has valid data. + */ +int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val) +{ + struct pmt_event *pevt = arch_priv; + struct event_group *e; + bool valid = false; + u64 total = 0; + u64 evtcount; + void *pevt0; + u32 idx; + + pevt0 = pevt - pevt->idx; + e = container_of(pevt0, struct event_group, evts); + idx = rmid * e->num_events; + idx += pevt->idx; + + if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) { + pr_warn_once("MMIO index %u out of range\n", idx); + return -EIO; + } + + for (int i = 0; i < e->pfg->count; i++) { + if (!e->pfg->regions[i].addr) + continue; + if (e->pfg->regions[i].plat_info.package_id != domid) + continue; + evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64)); + if (!(evtcount & DATA_VALID)) + continue; + total += evtcount & DATA_BITS; + valid = true; + } + + if (valid) + *val = total; + + return valid ? 0 : -EINVAL; +} + +void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r, + struct list_head *add_pos) +{ + struct rdt_perf_pkg_mon_domain *d; + int err; + + d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); + if (!d) + return; + + d->hdr.id = id; + d->hdr.type = RESCTRL_MON_DOMAIN; + d->hdr.rid = RDT_RESOURCE_PERF_PKG; + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + list_add_tail_rcu(&d->hdr.list, add_pos); + + err = resctrl_online_mon_domain(r, &d->hdr); + if (err) { + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + kfree(d); + } +} |
