summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/resctrl
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl')
-rw-r--r--arch/x86/kernel/cpu/resctrl/Makefile5
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c174
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c159
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h210
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c201
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c71
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c313
7 files changed, 619 insertions, 514 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 4a06c37b9cf1..0c13b0befd8a 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
+obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index b681c2e07dbf..cf29681d01e0 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -44,12 +44,6 @@ static DEFINE_MUTEX(domain_list_lock);
DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
/*
- * Used to store the max resource name width and max resource data width
- * to display the schemata in a tabular format
- */
-int max_name_width, max_data_width;
-
-/*
* Global boolean for rdt_alloc which is true if any
* resource allocation is enabled.
*/
@@ -62,7 +56,7 @@ static void mba_wrmsr_amd(struct msr_param *m);
#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
-struct rdt_hw_resource rdt_resources_all[] = {
+struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
[RDT_RESOURCE_L3] =
{
.r_resctrl = {
@@ -72,9 +66,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.mon_scope = RESCTRL_L3_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3),
.mon_domains = mon_domain_init(RDT_RESOURCE_L3),
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
+ .schema_fmt = RESCTRL_SCHEMA_BITMAP,
},
.msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
@@ -86,9 +78,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.name = "L2",
.ctrl_scope = RESCTRL_L2_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
+ .schema_fmt = RESCTRL_SCHEMA_BITMAP,
},
.msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
@@ -100,9 +90,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.name = "MB",
.ctrl_scope = RESCTRL_L3_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
- .parse_ctrlval = parse_bw,
- .format_str = "%d=%*u",
- .fflags = RFTYPE_RES_MB,
+ .schema_fmt = RESCTRL_SCHEMA_RANGE,
},
},
[RDT_RESOURCE_SMBA] =
@@ -112,9 +100,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.name = "SMBA",
.ctrl_scope = RESCTRL_L3_CACHE,
.ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
- .parse_ctrlval = parse_bw,
- .format_str = "%d=%*u",
- .fflags = RFTYPE_RES_MB,
+ .schema_fmt = RESCTRL_SCHEMA_RANGE,
},
},
};
@@ -127,6 +113,14 @@ u32 resctrl_arch_system_num_rmid_idx(void)
return r->num_rmid;
}
+struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
+{
+ if (l >= RDT_NUM_RESOURCES)
+ return NULL;
+
+ return &rdt_resources_all[l].r_resctrl;
+}
+
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
@@ -161,7 +155,6 @@ static inline void cache_alloc_hsw_probe(void)
return;
hw_res->num_closid = 4;
- r->default_ctrl = max_cbm;
r->cache.cbm_len = 20;
r->cache.shareable_bits = 0xc0000;
r->cache.min_cbm_bits = 2;
@@ -174,7 +167,7 @@ static inline void cache_alloc_hsw_probe(void)
bool is_mba_sc(struct rdt_resource *r)
{
if (!r)
- return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
+ r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
/*
* The software controller support is only applicable to MBA resource.
@@ -217,7 +210,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
hw_res->num_closid = edx.split.cos_max + 1;
max_delay = eax.split.max_delay + 1;
- r->default_ctrl = MAX_MBA_BW;
+ r->membw.max_bw = MAX_MBA_BW;
r->membw.arch_needs_linear = true;
if (ecx & MBA_IS_LINEAR) {
r->membw.delay_linear = true;
@@ -228,13 +221,11 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
return false;
r->membw.arch_needs_linear = false;
}
- r->data_width = 3;
if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
else
r->membw.throttle_mode = THREAD_THROTTLE_MAX;
- thread_throttle_mode_init();
r->alloc_capable = true;
@@ -254,7 +245,7 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
hw_res->num_closid = edx + 1;
- r->default_ctrl = 1 << eax;
+ r->membw.max_bw = 1 << eax;
/* AMD does not use delay */
r->membw.delay_linear = false;
@@ -267,8 +258,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
r->membw.min_bw = 0;
r->membw.bw_gran = 1;
- /* Max value is 2048, Data width should be 4 in decimal */
- r->data_width = 4;
r->alloc_capable = true;
@@ -281,14 +270,13 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
union cpuid_0x10_1_eax eax;
union cpuid_0x10_x_ecx ecx;
union cpuid_0x10_x_edx edx;
- u32 ebx;
+ u32 ebx, default_ctrl;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
hw_res->num_closid = edx.split.cos_max + 1;
r->cache.cbm_len = eax.split.cbm_len + 1;
- r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
- r->cache.shareable_bits = ebx & r->default_ctrl;
- r->data_width = (r->cache.cbm_len + 3) / 4;
+ default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
+ r->cache.shareable_bits = ebx & default_ctrl;
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
r->alloc_capable = true;
@@ -335,7 +323,7 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
return MAX_MBA_BW - bw;
pr_warn_once("Non Linear delay-bw map not supported but queried\n");
- return r->default_ctrl;
+ return MAX_MBA_BW;
}
static void mba_wrmsr_intel(struct msr_param *m)
@@ -359,36 +347,6 @@ static void cat_wrmsr(struct msr_param *m)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
-struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
-{
- struct rdt_ctrl_domain *d;
-
- lockdep_assert_cpus_held();
-
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return d;
- }
-
- return NULL;
-}
-
-struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
-{
- struct rdt_mon_domain *d;
-
- lockdep_assert_cpus_held();
-
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return d;
- }
-
- return NULL;
-}
-
u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
{
return resctrl_to_arch_res(r)->num_closid;
@@ -403,36 +361,6 @@ void rdt_ctrl_update(void *arg)
hw_res->msr_update(m);
}
-/*
- * rdt_find_domain - Search for a domain id in a resource domain list.
- *
- * Search the domain list to find the domain id. If the domain id is
- * found, return the domain. NULL otherwise. If the domain id is not
- * found (and NULL returned) then the first domain with id bigger than
- * the input id can be returned to the caller via @pos.
- */
-struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
- struct list_head **pos)
-{
- struct rdt_domain_hdr *d;
- struct list_head *l;
-
- list_for_each(l, h) {
- d = list_entry(l, struct rdt_domain_hdr, list);
- /* When id is found, return its domain. */
- if (id == d->id)
- return d;
- /* Stop searching when finding id's position in sorted list. */
- if (id < d->id)
- break;
- }
-
- if (pos)
- *pos = l;
-
- return NULL;
-}
-
static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -444,7 +372,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
* For Memory Allocation: Set b/w requested to 100%
*/
for (i = 0; i < hw_res->num_closid; i++, dc++)
- *dc = r->default_ctrl;
+ *dc = resctrl_get_default_ctrl(r);
}
static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
@@ -492,13 +420,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
size_t tsize;
- if (is_mbm_total_enabled()) {
+ if (resctrl_arch_is_mbm_total_enabled()) {
tsize = sizeof(*hw_dom->arch_mbm_total);
hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
if (!hw_dom->arch_mbm_total)
return -ENOMEM;
}
- if (is_mbm_local_enabled()) {
+ if (resctrl_arch_is_mbm_local_enabled()) {
tsize = sizeof(*hw_dom->arch_mbm_local);
hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
if (!hw_dom->arch_mbm_local) {
@@ -543,7 +471,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
return;
}
- hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
+ hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
if (hdr) {
if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
return;
@@ -598,7 +526,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
return;
}
- hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
+ hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
if (hdr) {
if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
return;
@@ -663,7 +591,7 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
return;
}
- hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
+ hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
if (!hdr) {
pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
id, cpu, r->name);
@@ -709,7 +637,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
return;
}
- hdr = rdt_find_domain(&r->mon_domains, id, NULL);
+ hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
if (!hdr) {
pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
id, cpu, r->name);
@@ -784,20 +712,6 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
return 0;
}
-/*
- * Choose a width for the resource name and resource data based on the
- * resource that has widest name and cbm.
- */
-static __init void rdt_init_padding(void)
-{
- struct rdt_resource *r;
-
- for_each_alloc_capable_rdt_resource(r) {
- if (r->data_width > max_data_width)
- max_data_width = r->data_width;
- }
-}
-
enum {
RDT_FLAG_CMT,
RDT_FLAG_MBM_TOTAL,
@@ -883,6 +797,21 @@ bool __init rdt_cpu_has(int flag)
return ret;
}
+__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
+{
+ if (!rdt_cpu_has(X86_FEATURE_BMEC))
+ return false;
+
+ switch (evt) {
+ case QOS_L3_MBM_TOTAL_EVENT_ID:
+ return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
+ case QOS_L3_MBM_LOCAL_EVENT_ID:
+ return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
+ default:
+ return false;
+ }
+}
+
static __init bool get_mem_config(void)
{
struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
@@ -1079,7 +1008,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
}
}
-static int __init resctrl_late_init(void)
+static int __init resctrl_arch_late_init(void)
{
struct rdt_resource *r;
int state, ret;
@@ -1095,8 +1024,6 @@ static int __init resctrl_late_init(void)
if (!get_rdt_resources())
return -ENODEV;
- rdt_init_padding();
-
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"x86/resctrl/cat:online:",
resctrl_arch_online_cpu,
@@ -1104,7 +1031,7 @@ static int __init resctrl_late_init(void)
if (state < 0)
return state;
- ret = rdtgroup_init();
+ ret = resctrl_init();
if (ret) {
cpuhp_remove_state(state);
return ret;
@@ -1120,18 +1047,13 @@ static int __init resctrl_late_init(void)
return 0;
}
-late_initcall(resctrl_late_init);
+late_initcall(resctrl_arch_late_init);
-static void __exit resctrl_exit(void)
+static void __exit resctrl_arch_exit(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
-
cpuhp_remove_state(rdt_online);
- rdtgroup_exit();
-
- if (r->mon_capable)
- rdt_put_mon_l3_config();
+ resctrl_exit();
}
-__exitcall(resctrl_exit);
+__exitcall(resctrl_arch_exit);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 200d89a64027..0a0ac5f6112e 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -23,6 +23,15 @@
#include "internal.h"
+struct rdt_parse_data {
+ struct rdtgroup *rdtgrp;
+ char *buf;
+};
+
+typedef int (ctrlval_parser_t)(struct rdt_parse_data *data,
+ struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d);
+
/*
* Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and max bandwidth values specified by the
@@ -54,9 +63,9 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
- if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ if (bw < r->membw.min_bw || bw > r->membw.max_bw) {
rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
- bw, r->membw.min_bw, r->default_ctrl);
+ bw, r->membw.min_bw, r->membw.max_bw);
return false;
}
@@ -64,8 +73,8 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
-int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d)
+static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d)
{
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
@@ -104,8 +113,9 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
*/
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{
- unsigned long first_bit, zero_bit, val;
+ u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1;
unsigned int cbm_len = r->cache.cbm_len;
+ unsigned long first_bit, zero_bit, val;
int ret;
ret = kstrtoul(buf, 16, &val);
@@ -114,7 +124,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return false;
}
- if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
+ if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}
@@ -143,8 +153,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d)
+static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
+ struct rdt_ctrl_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
struct resctrl_staged_config *cfg;
@@ -210,6 +220,7 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct rdtgroup *rdtgrp)
{
enum resctrl_conf_type t = s->conf_type;
+ ctrlval_parser_t *parse_ctrlval = NULL;
struct resctrl_staged_config *cfg;
struct rdt_resource *r = s->res;
struct rdt_parse_data data;
@@ -220,6 +231,18 @@ static int parse_line(char *line, struct resctrl_schema *s,
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ parse_ctrlval = &parse_cbm;
+ break;
+ case RESCTRL_SCHEMA_RANGE:
+ parse_ctrlval = &parse_bw;
+ break;
+ }
+
+ if (WARN_ON_ONCE(!parse_ctrlval))
+ return -EINVAL;
+
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
(r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
@@ -240,7 +263,7 @@ next:
if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
- if (r->parse_ctrlval(&data, s, d))
+ if (parse_ctrlval(&data, s, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
cfg = &d->staged_config[t];
@@ -264,25 +287,12 @@ next:
return -EINVAL;
}
-static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
-{
- switch (type) {
- default:
- case CDP_NONE:
- return closid;
- case CDP_CODE:
- return closid * 2 + 1;
- case CDP_DATA:
- return closid * 2;
- }
-}
-
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- u32 idx = get_config_index(closid, t);
+ u32 idx = resctrl_get_config_index(closid, t);
struct msr_param msr_param;
if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
@@ -319,7 +329,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
if (!cfg->have_new_ctrl)
continue;
- idx = get_config_index(closid, t);
+ idx = resctrl_get_config_index(closid, t);
if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
continue;
hw_dom->ctrl_val[idx] = cfg->new_ctrl;
@@ -439,7 +449,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
- u32 idx = get_config_index(closid, type);
+ u32 idx = resctrl_get_config_index(closid, type);
return hw_dom->ctrl_val[idx];
}
@@ -465,8 +475,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
ctrl_val = resctrl_arch_get_config(r, dom, closid,
schema->conf_type);
- seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
- ctrl_val);
+ seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val);
sep = true;
}
seq_puts(s, "\n");
@@ -518,6 +527,98 @@ static int smp_mon_event_count(void *arg)
return 0;
}
+ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+ rdt_last_cmd_clear();
+
+ if (!strcmp(buf, "mbm_local_bytes")) {
+ if (resctrl_arch_is_mbm_local_enabled())
+ rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ else
+ ret = -EINVAL;
+ } else if (!strcmp(buf, "mbm_total_bytes")) {
+ if (resctrl_arch_is_mbm_total_enabled())
+ rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+ else
+ ret = -EINVAL;
+ } else {
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
+
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
+}
+
+int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+ if (rdtgrp) {
+ switch (rdtgrp->mba_mbps_event) {
+ case QOS_L3_MBM_LOCAL_EVENT_ID:
+ seq_puts(s, "mbm_local_bytes\n");
+ break;
+ case QOS_L3_MBM_TOTAL_EVENT_ID:
+ seq_puts(s, "mbm_total_bytes\n");
+ break;
+ default:
+ pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
+ ret = -EINVAL;
+ break;
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
+ struct list_head **pos)
+{
+ struct rdt_domain_hdr *d;
+ struct list_head *l;
+
+ list_for_each(l, h) {
+ d = list_entry(l, struct rdt_domain_hdr, list);
+ /* When id is found, return its domain. */
+ if (id == d->id)
+ return d;
+ /* Stop searching when finding id's position in sorted list. */
+ if (id < d->id)
+ break;
+ }
+
+ if (pos)
+ *pos = l;
+
+ return NULL;
+}
+
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
cpumask_t *cpumask, int evtid, int first)
@@ -579,7 +680,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
resid = md.u.rid;
domid = md.u.domid;
evtid = md.u.evtid;
- r = &rdt_resources_all[resid].r_resctrl;
+ r = resctrl_arch_get_resource(resid);
if (md.u.sum) {
/*
@@ -603,7 +704,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
* This file provides data from a single domain. Search
* the resource to find the domain with "domid".
*/
- hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
+ hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
ret = -ENOENT;
goto out;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 955999aecfca..c44c5b496355 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -32,30 +32,6 @@
*/
#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
-/* Reads to Local DRAM Memory */
-#define READS_TO_LOCAL_MEM BIT(0)
-
-/* Reads to Remote DRAM Memory */
-#define READS_TO_REMOTE_MEM BIT(1)
-
-/* Non-Temporal Writes to Local Memory */
-#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2)
-
-/* Non-Temporal Writes to Remote Memory */
-#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3)
-
-/* Reads to Local Memory the system identifies as "Slow Memory" */
-#define READS_TO_LOCAL_S_MEM BIT(4)
-
-/* Reads to Remote Memory the system identifies as "Slow Memory" */
-#define READS_TO_REMOTE_S_MEM BIT(5)
-
-/* Dirty Victims to All Types of Memory */
-#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6)
-
-/* Max event bits supported */
-#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
-
/**
* cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
* aren't marked nohz_full
@@ -180,7 +156,6 @@ struct rmid_read {
void *arch_mon_ctx;
};
-extern unsigned int rdt_mon_features;
extern struct list_head resctrl_schema_all;
extern bool resctrl_mounted;
@@ -234,43 +209,6 @@ struct mongroup {
};
/**
- * struct pseudo_lock_region - pseudo-lock region information
- * @s: Resctrl schema for the resource to which this
- * pseudo-locked region belongs
- * @d: RDT domain to which this pseudo-locked region
- * belongs
- * @cbm: bitmask of the pseudo-locked region
- * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
- * completion
- * @thread_done: variable used by waitqueue to test if pseudo-locking
- * thread completed
- * @cpu: core associated with the cache on which the setup code
- * will be run
- * @line_size: size of the cache lines
- * @size: size of pseudo-locked region in bytes
- * @kmem: the kernel memory associated with pseudo-locked region
- * @minor: minor number of character device associated with this
- * region
- * @debugfs_dir: pointer to this region's directory in the debugfs
- * filesystem
- * @pm_reqs: Power management QoS requests related to this region
- */
-struct pseudo_lock_region {
- struct resctrl_schema *s;
- struct rdt_ctrl_domain *d;
- u32 cbm;
- wait_queue_head_t lock_thread_wq;
- int thread_done;
- int cpu;
- unsigned int line_size;
- unsigned int size;
- void *kmem;
- unsigned int minor;
- struct dentry *debugfs_dir;
- struct list_head pm_reqs;
-};
-
-/**
* struct rdtgroup - store rdtgroup's data in resctrl file system.
* @kn: kernfs node
* @rdtgroup_list: linked list for all rdtgroups
@@ -283,6 +221,7 @@ struct pseudo_lock_region {
* monitor only or ctrl_mon group
* @mon: mongroup related data
* @mode: mode of resource group
+ * @mba_mbps_event: input monitoring event id when mba_sc is enabled
* @plr: pseudo-locked region
*/
struct rdtgroup {
@@ -295,6 +234,7 @@ struct rdtgroup {
enum rdt_group_type type;
struct mongroup mon;
enum rdtgrp_mode mode;
+ enum resctrl_event_id mba_mbps_event;
struct pseudo_lock_region *plr;
};
@@ -324,10 +264,7 @@ struct rdtgroup {
/* List of all resource groups */
extern struct list_head rdt_all_groups;
-extern int max_name_width, max_data_width;
-
-int __init rdtgroup_init(void);
-void __exit rdtgroup_exit(void);
+extern int max_name_width;
/**
* struct rftype - describe each file in the resctrl file system
@@ -431,37 +368,6 @@ struct msr_param {
u32 high;
};
-static inline bool is_llc_occupancy_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
-}
-
-static inline bool is_mbm_total_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
-}
-
-static inline bool is_mbm_local_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
-}
-
-static inline bool is_mbm_enabled(void)
-{
- return (is_mbm_total_enabled() || is_mbm_local_enabled());
-}
-
-static inline bool is_mbm_event(int e)
-{
- return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
- e <= QOS_L3_MBM_LOCAL_EVENT_ID);
-}
-
-struct rdt_parse_data {
- struct rdtgroup *rdtgrp;
- char *buf;
-};
-
/**
* struct rdt_hw_resource - arch private attributes of a resctrl resource
* @r_resctrl: Attributes of the resource used directly by resctrl.
@@ -474,8 +380,6 @@ struct rdt_parse_data {
* @msr_update: Function pointer to update QOS MSRs
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
- * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth
- * Monitoring Event Configuration (BMEC) is supported.
* @cdp_enabled: CDP state of this resource
*
* Members of this structure are either private to the architecture
@@ -489,7 +393,6 @@ struct rdt_hw_resource {
void (*msr_update)(struct msr_param *m);
unsigned int mon_scale;
unsigned int mbm_width;
- unsigned int mbm_cfg_mask;
bool cdp_enabled;
};
@@ -498,34 +401,12 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
return container_of(r, struct rdt_hw_resource, r_resctrl);
}
-int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_ctrl_domain *d);
-
extern struct mutex rdtgroup_mutex;
extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
extern struct dentry *debugfs_resctrl;
-
-enum resctrl_res_level {
- RDT_RESOURCE_L3,
- RDT_RESOURCE_L2,
- RDT_RESOURCE_MBA,
- RDT_RESOURCE_SMBA,
-
- /* Must be the last */
- RDT_NUM_RESOURCES,
-};
-
-static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
-{
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
-
- hw_res++;
- return &hw_res->r_resctrl;
-}
+extern enum resctrl_event_id mba_mbps_default_event;
static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
{
@@ -536,27 +417,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
-/*
- * To return the common struct rdt_resource, which is contained in struct
- * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
- */
-#define for_each_rdt_resource(r) \
- for (r = &rdt_resources_all[0].r_resctrl; \
- r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \
- r = resctrl_inc(r))
-
-#define for_each_capable_rdt_resource(r) \
- for_each_rdt_resource(r) \
- if (r->alloc_capable || r->mon_capable)
-
-#define for_each_alloc_capable_rdt_resource(r) \
- for_each_rdt_resource(r) \
- if (r->alloc_capable)
-
-#define for_each_mon_capable_rdt_resource(r) \
- for_each_rdt_resource(r) \
- if (r->mon_capable)
-
/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
union cpuid_0x10_1_eax {
struct {
@@ -601,40 +461,33 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
umode_t mask);
-struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
- struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
+ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
-int rdt_pseudo_lock_init(void);
-void rdt_pseudo_lock_release(void);
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
-struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(u32 closid);
void free_rmid(u32 closid, u32 rmid);
int rdt_get_mon_l3_config(struct rdt_resource *r);
-void __exit rdt_put_mon_l3_config(void);
+void resctrl_mon_resource_exit(void);
bool __init rdt_cpu_has(int flag);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
cpumask_t *cpumask, int evtid, int first);
+int __init resctrl_mon_resource_init(void);
void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
unsigned long delay_ms,
int exclude_cpu);
@@ -647,10 +500,49 @@ void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_mon_domain *d);
void __check_limbo(struct rdt_mon_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
-void __init thread_throttle_mode_init(void);
-void __init mbm_config_rftype_init(const char *config);
+void resctrl_file_fflags_init(const char *config, unsigned long fflags);
void rdt_staged_configs_clear(void);
bool closid_allocated(unsigned int closid);
int resctrl_find_cleanest_closid(void);
+#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
+int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
+int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
+int rdt_pseudo_lock_init(void);
+void rdt_pseudo_lock_release(void);
+int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
+void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
+#else
+static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
+{
+ return false;
+}
+
+static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
+{
+ return false;
+}
+
+static inline int rdt_pseudo_lock_init(void) { return 0; }
+static inline void rdt_pseudo_lock_release(void) { }
+static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { }
+#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */
+
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 5fcb3d635d91..a93ed7d2a160 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -295,11 +295,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *
{
struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
- if (is_mbm_total_enabled())
+ if (resctrl_arch_is_mbm_total_enabled())
memset(hw_dom->arch_mbm_total, 0,
sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
- if (is_mbm_local_enabled())
+ if (resctrl_arch_is_mbm_local_enabled())
memset(hw_dom->arch_mbm_local, 0,
sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
}
@@ -365,7 +365,7 @@ static void limbo_release_entry(struct rmid_entry *entry)
*/
void __check_limbo(struct rdt_mon_domain *d, bool force_free)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
u32 idx, cur_idx = 1;
@@ -521,7 +521,7 @@ int alloc_rmid(u32 closid)
static void add_rmid_to_limbo(struct rmid_entry *entry)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
struct rdt_mon_domain *d;
u32 idx;
@@ -569,7 +569,7 @@ void free_rmid(u32 closid, u32 rmid)
entry = __rmid_entry(idx);
- if (is_llc_occupancy_enabled())
+ if (resctrl_arch_is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
else
list_add_tail(&entry->list, &rmid_free_lru);
@@ -663,9 +663,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;
+ struct mbm_state *m;
+
+ m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+ if (WARN_ON_ONCE(!m))
+ return;
cur_bytes = rr->val;
bytes = cur_bytes - m->prev_bw_bytes;
@@ -715,6 +718,22 @@ void mon_event_count(void *info)
rr->err = 0;
}
+static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
+ struct rdt_resource *r)
+{
+ struct rdt_ctrl_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
/*
* Feedback loop for MBA software controller (mba_sc)
*
@@ -752,20 +771,20 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
struct rdt_ctrl_domain *dom_mba;
+ enum resctrl_event_id evt_id;
struct rdt_resource *r_mba;
- u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
+ u32 cur_bw, user_bw;
- if (!is_mbm_local_enabled())
- return;
-
- r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ evt_id = rgrp->mba_mbps_event;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- pmbm_data = &dom_mbm->mbm_local[idx];
+ pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
+ if (WARN_ON_ONCE(!pmbm_data))
+ return;
dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -784,7 +803,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
+ if (WARN_ON_ONCE(!cmbm_data))
+ return;
cur_bw += cmbm_data->prev_bw;
}
@@ -813,54 +834,45 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}
-static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid)
+static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, enum resctrl_event_id evtid)
{
struct rmid_read rr = {0};
rr.r = r;
rr.d = d;
+ rr.evtid = evtid;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
+
+ __mon_event_count(closid, rmid, &rr);
/*
- * This is protected from concurrent reads from user
- * as both the user and we hold the global mutex.
+ * If the software controller is enabled, compute the
+ * bandwidth for this event id.
*/
- if (is_mbm_total_enabled()) {
- rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
- rr.val = 0;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
-
- __mon_event_count(closid, rmid, &rr);
+ if (is_mba_sc(NULL))
+ mbm_bw_count(closid, rmid, &rr);
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
- }
- if (is_mbm_local_enabled()) {
- rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
- rr.val = 0;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
-
- __mon_event_count(closid, rmid, &rr);
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+}
- /*
- * Call the MBA software controller only for the
- * control groups and when user has enabled
- * the software controller explicitly.
- */
- if (is_mba_sc(NULL))
- mbm_bw_count(closid, rmid, &rr);
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid)
+{
+ /*
+ * This is protected from concurrent reads from user as both
+ * the user and overflow handler hold the global mutex.
+ */
+ if (resctrl_arch_is_mbm_total_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
- }
+ if (resctrl_arch_is_mbm_local_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
}
/*
@@ -929,7 +941,7 @@ void mbm_handle_overflow(struct work_struct *work)
if (!resctrl_mounted || !resctrl_arch_mon_capable())
goto out_unlock;
- r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
d = container_of(work, struct rdt_mon_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
@@ -1031,7 +1043,7 @@ static int dom_data_init(struct rdt_resource *r)
/*
* RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
* are always allocated. These are used for the rdtgroup_default
- * control group, which will be setup later in rdtgroup_init().
+ * control group, which will be setup later in resctrl_init().
*/
idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
RESCTRL_RESERVED_RMID);
@@ -1044,10 +1056,13 @@ out_unlock:
return err;
}
-static void __exit dom_data_exit(void)
+static void dom_data_exit(struct rdt_resource *r)
{
mutex_lock(&rdtgroup_mutex);
+ if (!r->mon_capable)
+ goto out_unlock;
+
if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
kfree(closid_num_dirty_rmid);
closid_num_dirty_rmid = NULL;
@@ -1056,6 +1071,7 @@ static void __exit dom_data_exit(void)
kfree(rmid_ptrs);
rmid_ptrs = NULL;
+out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
@@ -1085,11 +1101,11 @@ static void l3_mon_evt_init(struct rdt_resource *r)
{
INIT_LIST_HEAD(&r->evt_list);
- if (is_llc_occupancy_enabled())
+ if (resctrl_arch_is_llc_occupancy_enabled())
list_add_tail(&llc_occupancy_event.list, &r->evt_list);
- if (is_mbm_total_enabled())
+ if (resctrl_arch_is_mbm_total_enabled())
list_add_tail(&mbm_total_event.list, &r->evt_list);
- if (is_mbm_local_enabled())
+ if (resctrl_arch_is_mbm_local_enabled())
list_add_tail(&mbm_local_event.list, &r->evt_list);
}
@@ -1176,12 +1192,56 @@ static __init int snc_get_config(void)
return ret;
}
+/**
+ * resctrl_mon_resource_init() - Initialise global monitoring structures.
+ *
+ * Allocate and initialise global monitor resources that do not belong to a
+ * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
+ * Called once during boot after the struct rdt_resource's have been configured
+ * but before the filesystem is mounted.
+ * Resctrl's cpuhp callbacks may be called before this point to bring a domain
+ * online.
+ *
+ * Returns 0 for success, or -ENOMEM.
+ */
+int __init resctrl_mon_resource_init(void)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ int ret;
+
+ if (!r->mon_capable)
+ return 0;
+
+ ret = dom_data_init(r);
+ if (ret)
+ return ret;
+
+ l3_mon_evt_init(r);
+
+ if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
+ mbm_total_event.configurable = true;
+ resctrl_file_fflags_init("mbm_total_bytes_config",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ }
+ if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
+ mbm_local_event.configurable = true;
+ resctrl_file_fflags_init("mbm_local_bytes_config",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ }
+
+ if (resctrl_arch_is_mbm_local_enabled())
+ mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ else if (resctrl_arch_is_mbm_total_enabled())
+ mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+
+ return 0;
+}
+
int __init rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
unsigned int threshold;
- int ret;
snc_nodes_per_l3_cache = snc_get_config();
@@ -1211,37 +1271,24 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
*/
resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
- ret = dom_data_init(r);
- if (ret)
- return ret;
-
if (rdt_cpu_has(X86_FEATURE_BMEC)) {
u32 eax, ebx, ecx, edx;
/* Detect list of bandwidth sources that can be tracked */
cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
- hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
-
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
- mbm_total_event.configurable = true;
- mbm_config_rftype_init("mbm_total_bytes_config");
- }
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
- mbm_local_event.configurable = true;
- mbm_config_rftype_init("mbm_local_bytes_config");
- }
+ r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
}
- l3_mon_evt_init(r);
-
r->mon_capable = true;
return 0;
}
-void __exit rdt_put_mon_l3_config(void)
+void resctrl_mon_resource_exit(void)
{
- dom_data_exit();
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+
+ dom_data_exit(r);
}
void __init intel_rdt_mbm_apply_quirk(void)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 972e6b6b0481..01fa7890b43f 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -61,7 +61,8 @@ static const struct class pseudo_lock_class = {
};
/**
- * get_prefetch_disable_bits - prefetch disable bits of supported platforms
+ * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported
+ * platforms
* @void: It takes no parameters.
*
* Capture the list of platforms that have been validated to support
@@ -75,14 +76,16 @@ static const struct class pseudo_lock_class = {
* in the SDM.
*
* When adding a platform here also add support for its cache events to
- * measure_cycles_perf_fn()
+ * resctrl_arch_measure_l*_residency()
*
* Return:
* If platform is supported, the bits to disable hardware prefetchers, 0
* if platform is not supported.
*/
-static u64 get_prefetch_disable_bits(void)
+u64 resctrl_arch_get_prefetch_disable_bits(void)
{
+ prefetch_disable_bits = 0;
+
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
return 0;
@@ -98,7 +101,8 @@ static u64 get_prefetch_disable_bits(void)
* 3 DCU IP Prefetcher Disable (R/W)
* 63:4 Reserved
*/
- return 0xF;
+ prefetch_disable_bits = 0xF;
+ break;
case INTEL_ATOM_GOLDMONT:
case INTEL_ATOM_GOLDMONT_PLUS:
/*
@@ -109,10 +113,11 @@ static u64 get_prefetch_disable_bits(void)
* 2 DCU Hardware Prefetcher Disable (R/W)
* 63:3 Reserved
*/
- return 0x5;
+ prefetch_disable_bits = 0x5;
+ break;
}
- return 0;
+ return prefetch_disable_bits;
}
/**
@@ -408,8 +413,8 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp)
}
/**
- * pseudo_lock_fn - Load kernel memory into cache
- * @_rdtgrp: resource group to which pseudo-lock region belongs
+ * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache
+ * @_plr: the pseudo-lock region descriptor
*
* This is the core pseudo-locking flow.
*
@@ -426,10 +431,9 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp)
*
* Return: 0. Waiter on waitqueue will be woken on completion.
*/
-static int pseudo_lock_fn(void *_rdtgrp)
+int resctrl_arch_pseudo_lock_fn(void *_plr)
{
- struct rdtgroup *rdtgrp = _rdtgrp;
- struct pseudo_lock_region *plr = rdtgrp->plr;
+ struct pseudo_lock_region *plr = _plr;
u32 rmid_p, closid_p;
unsigned long i;
u64 saved_msr;
@@ -459,7 +463,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
* increase likelihood that allocated cache portion will be filled
* with associated memory.
*/
- native_wbinvd();
+ wbinvd();
/*
* Always called with interrupts enabled. By disabling interrupts
@@ -489,7 +493,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
* pseudo-locked followed by reading of kernel memory to load it
* into the cache.
*/
- __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
+ __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid);
+
/*
* Cache was flushed earlier. Now access kernel memory to read it
* into cache region associated with just activated plr->closid.
@@ -712,8 +717,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* Not knowing the bits to disable prefetching implies that this
* platform does not support Cache Pseudo-Locking.
*/
- prefetch_disable_bits = get_prefetch_disable_bits();
- if (prefetch_disable_bits == 0) {
+ if (resctrl_arch_get_prefetch_disable_bits() == 0) {
rdt_last_cmd_puts("Pseudo-locking not supported\n");
return -EINVAL;
}
@@ -872,7 +876,8 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
}
/**
- * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory
+ * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read
+ * pseudo-locked memory
* @_plr: pseudo-lock region to measure
*
* There is no deterministic way to test if a memory region is cached. One
@@ -885,7 +890,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
*
* Return: 0. Waiter on waitqueue will be woken on completion.
*/
-static int measure_cycles_lat_fn(void *_plr)
+int resctrl_arch_measure_cycles_lat_fn(void *_plr)
{
struct pseudo_lock_region *plr = _plr;
u32 saved_low, saved_high;
@@ -1069,7 +1074,7 @@ out:
return 0;
}
-static int measure_l2_residency(void *_plr)
+int resctrl_arch_measure_l2_residency(void *_plr)
{
struct pseudo_lock_region *plr = _plr;
struct residency_counts counts = {0};
@@ -1107,7 +1112,7 @@ out:
return 0;
}
-static int measure_l3_residency(void *_plr)
+int resctrl_arch_measure_l3_residency(void *_plr)
{
struct pseudo_lock_region *plr = _plr;
struct residency_counts counts = {0};
@@ -1205,20 +1210,14 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
plr->cpu = cpu;
if (sel == 1)
- thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
- cpu_to_node(cpu),
- "pseudo_lock_measure/%u",
- cpu);
+ thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn,
+ plr, cpu, "pseudo_lock_measure/%u");
else if (sel == 2)
- thread = kthread_create_on_node(measure_l2_residency, plr,
- cpu_to_node(cpu),
- "pseudo_lock_measure/%u",
- cpu);
+ thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency,
+ plr, cpu, "pseudo_lock_measure/%u");
else if (sel == 3)
- thread = kthread_create_on_node(measure_l3_residency, plr,
- cpu_to_node(cpu),
- "pseudo_lock_measure/%u",
- cpu);
+ thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency,
+ plr, cpu, "pseudo_lock_measure/%u");
else
goto out;
@@ -1226,8 +1225,6 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
ret = PTR_ERR(thread);
goto out;
}
- kthread_bind(thread, cpu);
- wake_up_process(thread);
ret = wait_event_interruptible(plr->lock_thread_wq,
plr->thread_done == 1);
@@ -1315,18 +1312,14 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
plr->thread_done = 0;
- thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
- cpu_to_node(plr->cpu),
- "pseudo_lock/%u", plr->cpu);
+ thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr,
+ plr->cpu, "pseudo_lock/%u");
if (IS_ERR(thread)) {
ret = PTR_ERR(thread);
rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
goto out_cstates;
}
- kthread_bind(thread, plr->cpu);
- wake_up_process(thread);
-
ret = wait_event_interruptible(plr->lock_thread_wq,
plr->thread_done == 1);
if (ret < 0) {
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index d906a1cd8491..c6274d40b217 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -57,6 +57,12 @@ static struct kernfs_node *kn_mongrp;
/* Kernel fs node for "mon_data" directory under root */
static struct kernfs_node *kn_mondata;
+/*
+ * Used to store the max resource name width to display the schemata names in
+ * a tabular format.
+ */
+int max_name_width;
+
static struct seq_buf last_cmd_status;
static char last_cmd_status_buf[512];
@@ -65,6 +71,15 @@ static void rdtgroup_destroy_root(void);
struct dentry *debugfs_resctrl;
+/*
+ * Memory bandwidth monitoring event to use for the default CTRL_MON group
+ * and each new CTRL_MON group created by the user. Only relevant when
+ * the filesystem is mounted with the "mba_MBps" option so it does not
+ * matter that it remains uninitialized on systems that do not support
+ * the "mba_MBps" option.
+ */
+enum resctrl_event_id mba_mbps_default_event;
+
static bool resctrl_debug;
void rdt_last_cmd_clear(void)
@@ -102,6 +117,18 @@ void rdt_staged_configs_clear(void)
}
}
+static bool resctrl_is_mbm_enabled(void)
+{
+ return (resctrl_arch_is_mbm_total_enabled() ||
+ resctrl_arch_is_mbm_local_enabled());
+}
+
+static bool resctrl_is_mbm_event(int e)
+{
+ return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -148,7 +175,8 @@ static int closid_alloc(void)
lockdep_assert_held(&rdtgroup_mutex);
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
+ resctrl_arch_is_llc_occupancy_enabled()) {
cleanest_closid = resctrl_find_cleanest_closid();
if (cleanest_closid < 0)
return cleanest_closid;
@@ -339,13 +367,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
* from update_closid_rmid() is protected against __switch_to() because
* preemption is disabled.
*/
-static void update_cpu_closid_rmid(void *info)
+void resctrl_arch_sync_cpu_closid_rmid(void *info)
{
- struct rdtgroup *r = info;
+ struct resctrl_cpu_defaults *r = info;
if (r) {
this_cpu_write(pqr_state.default_closid, r->closid);
- this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+ this_cpu_write(pqr_state.default_rmid, r->rmid);
}
/*
@@ -360,11 +388,20 @@ static void update_cpu_closid_rmid(void *info)
* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
*
* Per task closids/rmids must have been set up before calling this function.
+ * @r may be NULL.
*/
static void
update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
- on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
+ struct resctrl_cpu_defaults defaults, *p = NULL;
+
+ if (r) {
+ defaults.closid = r->closid;
+ defaults.rmid = r->mon.rmid;
+ p = &defaults;
+ }
+
+ on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
}
static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
@@ -962,7 +999,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct resctrl_schema *s = of->kn->parent->priv;
struct rdt_resource *r = s->res;
- seq_printf(seq, "%x\n", r->default_ctrl);
+ seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
return 0;
}
@@ -1151,10 +1188,19 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
struct resctrl_schema *s = of->kn->parent->priv;
struct rdt_resource *r = s->res;
- if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
+ switch (r->membw.throttle_mode) {
+ case THREAD_THROTTLE_PER_THREAD:
seq_puts(seq, "per-thread\n");
- else
+ return 0;
+ case THREAD_THROTTLE_MAX:
seq_puts(seq, "max\n");
+ return 0;
+ case THREAD_THROTTLE_UNDEFINED:
+ seq_puts(seq, "undefined\n");
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
return 0;
}
@@ -1416,7 +1462,8 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
goto out;
}
rdtgrp->mode = RDT_MODE_EXCLUSIVE;
- } else if (!strcmp(buf, "pseudo-locksetup")) {
+ } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
+ !strcmp(buf, "pseudo-locksetup")) {
ret = rdtgroup_locksetup_enter(rdtgrp);
if (ret)
goto out;
@@ -1543,11 +1590,6 @@ out:
return ret;
}
-struct mon_config_info {
- u32 evtid;
- u32 mon_config;
-};
-
#define INVALID_CONFIG_INDEX UINT_MAX
/**
@@ -1572,31 +1614,32 @@ static inline unsigned int mon_event_config_index_get(u32 evtid)
}
}
-static void mon_event_config_read(void *info)
+void resctrl_arch_mon_event_config_read(void *_config_info)
{
- struct mon_config_info *mon_info = info;
+ struct resctrl_mon_config_info *config_info = _config_info;
unsigned int index;
u64 msrval;
- index = mon_event_config_index_get(mon_info->evtid);
+ index = mon_event_config_index_get(config_info->evtid);
if (index == INVALID_CONFIG_INDEX) {
- pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+ pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
/* Report only the valid event configuration bits */
- mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
+ config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}
-static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
+static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
{
- smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
+ smp_call_function_any(&mon_info->d->hdr.cpu_mask,
+ resctrl_arch_mon_event_config_read, mon_info, 1);
}
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
- struct mon_config_info mon_info;
+ struct resctrl_mon_config_info mon_info;
struct rdt_mon_domain *dom;
bool sep = false;
@@ -1607,9 +1650,11 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
if (sep)
seq_puts(s, ";");
- memset(&mon_info, 0, sizeof(struct mon_config_info));
+ memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
+ mon_info.r = r;
+ mon_info.d = dom;
mon_info.evtid = evtid;
- mondata_config_read(dom, &mon_info);
+ mondata_config_read(&mon_info);
seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
@@ -1642,30 +1687,32 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
return 0;
}
-static void mon_event_config_write(void *info)
+void resctrl_arch_mon_event_config_write(void *_config_info)
{
- struct mon_config_info *mon_info = info;
+ struct resctrl_mon_config_info *config_info = _config_info;
unsigned int index;
- index = mon_event_config_index_get(mon_info->evtid);
+ index = mon_event_config_index_get(config_info->evtid);
if (index == INVALID_CONFIG_INDEX) {
- pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+ pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
- wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
+ wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0);
}
static void mbm_config_write_domain(struct rdt_resource *r,
struct rdt_mon_domain *d, u32 evtid, u32 val)
{
- struct mon_config_info mon_info = {0};
+ struct resctrl_mon_config_info mon_info = {0};
/*
* Read the current config value first. If both are the same then
* no need to write it again.
*/
+ mon_info.r = r;
+ mon_info.d = d;
mon_info.evtid = evtid;
- mondata_config_read(d, &mon_info);
+ mondata_config_read(&mon_info);
if (mon_info.mon_config == val)
return;
@@ -1677,7 +1724,7 @@ static void mbm_config_write_domain(struct rdt_resource *r,
* are scoped at the domain level. Writing any of these MSRs
* on one CPU is observed by all the CPUs in the domain.
*/
- smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
+ smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
&mon_info, 1);
/*
@@ -1694,7 +1741,6 @@ static void mbm_config_write_domain(struct rdt_resource *r,
static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
{
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
struct rdt_mon_domain *d;
@@ -1721,9 +1767,9 @@ next:
}
/* Value from user cannot be more than the supported set of events */
- if ((val & hw_res->mbm_cfg_mask) != val) {
+ if ((val & r->mbm_cfg_mask) != val) {
rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
- hw_res->mbm_cfg_mask);
+ r->mbm_cfg_mask);
return -EINVAL;
}
@@ -1942,6 +1988,13 @@ static struct rftype res_common_files[] = {
.fflags = RFTYPE_CTRL_BASE,
},
{
+ .name = "mba_MBps_event",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_mba_mbps_event_write,
+ .seq_show = rdtgroup_mba_mbps_event_show,
+ },
+ {
.name = "mode",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -2020,24 +2073,35 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
return NULL;
}
-void __init thread_throttle_mode_init(void)
+static void thread_throttle_mode_init(void)
{
- struct rftype *rft;
+ enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
+ struct rdt_resource *r_mba, *r_smba;
- rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
- if (!rft)
+ r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ if (r_mba->alloc_capable &&
+ r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_mba->membw.throttle_mode;
+
+ r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
+ if (r_smba->alloc_capable &&
+ r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_smba->membw.throttle_mode;
+
+ if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
return;
- rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
+ resctrl_file_fflags_init("thread_throttle_mode",
+ RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
}
-void __init mbm_config_rftype_init(const char *config)
+void resctrl_file_fflags_init(const char *config, unsigned long fflags)
{
struct rftype *rft;
rft = rdtgroup_get_rftype_by_name(config);
if (rft)
- rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
+ rft->fflags = fflags;
}
/**
@@ -2159,6 +2223,20 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
return ret;
}
+static unsigned long fflags_from_resource(struct rdt_resource *r)
+{
+ switch (r->rid) {
+ case RDT_RESOURCE_L3:
+ case RDT_RESOURCE_L2:
+ return RFTYPE_RES_CACHE;
+ case RDT_RESOURCE_MBA:
+ case RDT_RESOURCE_SMBA:
+ return RFTYPE_RES_MB;
+ }
+
+ return WARN_ON_ONCE(1);
+}
+
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
struct resctrl_schema *s;
@@ -2179,14 +2257,14 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
/* loop over enabled controls, these are all alloc_capable */
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
- fflags = r->fflags | RFTYPE_CTRL_INFO;
+ fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
if (ret)
goto out_destroy;
}
for_each_mon_capable_rdt_resource(r) {
- fflags = r->fflags | RFTYPE_MON_INFO;
+ fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
sprintf(name, "%s_MON", r->name);
ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
if (ret)
@@ -2250,7 +2328,7 @@ static void l2_qos_cfg_update(void *arg)
static inline bool is_mba_linear(void)
{
- return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
+ return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
}
static int set_cache_qos_cfg(int level, bool enable)
@@ -2340,10 +2418,10 @@ static void mba_sc_domain_destroy(struct rdt_resource *r,
*/
static bool supports_mba_mbps(void)
{
- struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- return (is_mbm_local_enabled() &&
+ return (resctrl_is_mbm_enabled() &&
r->alloc_capable && is_mba_linear() &&
r->ctrl_scope == rmbm->mon_scope);
}
@@ -2354,9 +2432,10 @@ static bool supports_mba_mbps(void)
*/
static int set_mba_sc(bool mba_sc)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
u32 num_closid = resctrl_arch_get_num_closid(r);
struct rdt_ctrl_domain *d;
+ unsigned long fflags;
int i;
if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@@ -2364,11 +2443,16 @@ static int set_mba_sc(bool mba_sc)
r->membw.mba_sc = mba_sc;
+ rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
+
list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
+ fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
+ resctrl_file_fflags_init("mba_MBps_event", fflags);
+
return 0;
}
@@ -2585,6 +2669,20 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type
if (cl > max_name_width)
max_name_width = cl;
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ s->fmt_str = "%d=%x";
+ break;
+ case RESCTRL_SCHEMA_RANGE:
+ s->fmt_str = "%d=%u";
+ break;
+ }
+
+ if (WARN_ON_ONCE(!s->fmt_str)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
INIT_LIST_HEAD(&s->list);
list_add(&s->list, &resctrl_schema_all);
@@ -2701,8 +2799,8 @@ static int rdt_get_tree(struct fs_context *fc)
if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
resctrl_mounted = true;
- if (is_mbm_enabled()) {
- r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ if (resctrl_is_mbm_enabled()) {
+ r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
@@ -2768,7 +2866,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
- msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope";
+ msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
@@ -2812,7 +2910,7 @@ static int rdt_init_fs_context(struct fs_context *fc)
return 0;
}
-static int reset_all_ctrls(struct rdt_resource *r)
+void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_ctrl_domain *hw_dom;
@@ -2836,12 +2934,12 @@ static int reset_all_ctrls(struct rdt_resource *r)
hw_dom = resctrl_to_arch_ctrl_dom(d);
for (i = 0; i < hw_res->num_closid; i++)
- hw_dom->ctrl_val[i] = r->default_ctrl;
+ hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r);
msr_param.dom = d;
smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
- return 0;
+ return;
}
/*
@@ -2960,9 +3058,10 @@ static void rdt_kill_sb(struct super_block *sb)
rdt_disable_ctx();
- /*Put everything back to default values. */
+ /* Put everything back to default values. */
for_each_alloc_capable_rdt_resource(r)
- reset_all_ctrls(r);
+ resctrl_arch_reset_all_ctrls(r);
+
rmdir_all_sub();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
@@ -3069,7 +3168,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
if (ret)
return ret;
- if (!do_sum && is_mbm_event(mevt->evtid))
+ if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
}
@@ -3371,7 +3470,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
}
cfg = &d->staged_config[CDP_NONE];
- cfg->new_ctrl = r->default_ctrl;
+ cfg->new_ctrl = resctrl_get_default_ctrl(r);
cfg->have_new_ctrl = true;
}
}
@@ -3622,6 +3721,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
}
+ if (is_mba_sc(NULL))
+ rdtgrp->mba_mbps_event = mba_mbps_default_event;
}
goto out_unlock;
@@ -3683,14 +3784,21 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ u32 closid, rmid;
int cpu;
/* Give any tasks back to the parent group */
rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
- /* Update per cpu rmid of the moved CPUs first */
+ /*
+ * Update per cpu closid/rmid of the moved CPUs first.
+ * Note: the closid will not change, but the arch code still needs it.
+ */
+ closid = prdtgrp->closid;
+ rmid = prdtgrp->mon.rmid;
for_each_cpu(cpu, &rdtgrp->cpu_mask)
- per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
+
/*
* Update the MSR on moved CPUs and CPUs which have moved
* task running on them.
@@ -3723,6 +3831,7 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
+ u32 closid, rmid;
int cpu;
/* Give any tasks back to the default group */
@@ -3733,10 +3842,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
/* Update per cpu closid and rmid of the moved CPUs first */
- for_each_cpu(cpu, &rdtgrp->cpu_mask) {
- per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
- per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
- }
+ closid = rdtgroup_default.closid;
+ rmid = rdtgroup_default.mon.rmid;
+ for_each_cpu(cpu, &rdtgrp->cpu_mask)
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
/*
* Update the MSR on moved CPUs and CPUs which have moved
@@ -3937,7 +4046,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
+ if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
seq_puts(seq, ",mba_MBps");
if (resctrl_debug)
@@ -4016,9 +4125,9 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
if (resctrl_mounted && resctrl_arch_mon_capable())
rmdir_mondata_subdir_allrdtgrp(r, d);
- if (is_mbm_enabled())
+ if (resctrl_is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
- if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
+ if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
/*
* When a package is going down, forcefully
* decrement rmid->ebusy. There is no way to know
@@ -4036,17 +4145,30 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
mutex_unlock(&rdtgroup_mutex);
}
+/**
+ * domain_setup_mon_state() - Initialise domain monitoring structures.
+ * @r: The resource for the newly online domain.
+ * @d: The newly online domain.
+ *
+ * Allocate monitor resources that belong to this domain.
+ * Called when the first CPU of a domain comes online, regardless of whether
+ * the filesystem is mounted.
+ * During boot this may be called before global allocations have been made by
+ * resctrl_mon_resource_init().
+ *
+ * Returns 0 for success, or -ENOMEM.
+ */
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
- if (is_llc_occupancy_enabled()) {
+ if (resctrl_arch_is_llc_occupancy_enabled()) {
d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
- if (is_mbm_total_enabled()) {
+ if (resctrl_arch_is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
@@ -4054,7 +4176,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain
return -ENOMEM;
}
}
- if (is_mbm_local_enabled()) {
+ if (resctrl_arch_is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
@@ -4093,13 +4215,13 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
if (err)
goto out_unlock;
- if (is_mbm_enabled()) {
+ if (resctrl_is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
- if (is_llc_occupancy_enabled())
+ if (resctrl_arch_is_llc_occupancy_enabled())
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
/*
@@ -4135,9 +4257,25 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
}
}
+static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
+ struct rdt_resource *r)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
void resctrl_offline_cpu(unsigned int cpu)
{
- struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
@@ -4154,12 +4292,12 @@ void resctrl_offline_cpu(unsigned int cpu)
d = get_mon_domain_from_cpu(cpu, l3);
if (d) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0, cpu);
}
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(d)) {
+ if (resctrl_arch_is_llc_occupancy_enabled() &&
+ cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
cancel_delayed_work(&d->cqm_limbo);
cqm_setup_limbo_handler(d, 0, cpu);
}
@@ -4170,14 +4308,14 @@ out_unlock:
}
/*
- * rdtgroup_init - rdtgroup initialization
+ * resctrl_init - resctrl filesystem initialization
*
* Setup resctrl file system including set up root, create mount point,
- * register rdtgroup filesystem, and initialize files under root directory.
+ * register resctrl filesystem, and initialize files under root directory.
*
* Return: 0 on success or -errno
*/
-int __init rdtgroup_init(void)
+int __init resctrl_init(void)
{
int ret = 0;
@@ -4186,10 +4324,18 @@ int __init rdtgroup_init(void)
rdtgroup_setup_default();
- ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+ thread_throttle_mode_init();
+
+ ret = resctrl_mon_resource_init();
if (ret)
return ret;
+ ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+ if (ret) {
+ resctrl_mon_resource_exit();
+ return ret;
+ }
+
ret = register_filesystem(&rdt_fs_type);
if (ret)
goto cleanup_mountpoint;
@@ -4221,13 +4367,16 @@ int __init rdtgroup_init(void)
cleanup_mountpoint:
sysfs_remove_mount_point(fs_kobj, "resctrl");
+ resctrl_mon_resource_exit();
return ret;
}
-void __exit rdtgroup_exit(void)
+void __exit resctrl_exit(void)
{
debugfs_remove_recursive(debugfs_resctrl);
unregister_filesystem(&rdt_fs_type);
sysfs_remove_mount_point(fs_kobj, "resctrl");
+
+ resctrl_mon_resource_exit();
}