diff options
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl')
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c | 181 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h | 206 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 119 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 69 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 403 |
7 files changed, 570 insertions, 506 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 4a06c37b9cf1..0c13b0befd8a 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o +obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 3d1735ed8d1f..cf29681d01e0 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -44,12 +44,6 @@ static DEFINE_MUTEX(domain_list_lock); DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); /* - * Used to store the max resource name width and max resource data width - * to display the schemata in a tabular format - */ -int max_name_width, max_data_width; - -/* * Global boolean for rdt_alloc which is true if any * resource allocation is enabled. */ @@ -62,7 +56,7 @@ static void mba_wrmsr_amd(struct msr_param *m); #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) -struct rdt_hw_resource rdt_resources_all[] = { +struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { [RDT_RESOURCE_L3] = { .r_resctrl = { @@ -72,9 +66,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .mon_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), .mon_domains = mon_domain_init(RDT_RESOURCE_L3), - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, }, .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, @@ -86,9 +78,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "L2", .ctrl_scope = RESCTRL_L2_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), - .parse_ctrlval = parse_cbm, - .format_str = "%d=%0*x", - .fflags = RFTYPE_RES_CACHE, + .schema_fmt = RESCTRL_SCHEMA_BITMAP, }, .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, @@ -100,9 +90,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "MB", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, [RDT_RESOURCE_SMBA] = @@ -112,9 +100,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .name = "SMBA", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), - .parse_ctrlval = parse_bw, - .format_str = "%d=%*u", - .fflags = RFTYPE_RES_MB, + .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, }; @@ -127,6 +113,14 @@ u32 resctrl_arch_system_num_rmid_idx(void) return r->num_rmid; } +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) +{ + if (l >= RDT_NUM_RESOURCES) + return NULL; + + return &rdt_resources_all[l].r_resctrl; +} + /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. @@ -161,7 +155,6 @@ static inline void cache_alloc_hsw_probe(void) return; hw_res->num_closid = 4; - r->default_ctrl = max_cbm; r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; @@ -174,7 +167,7 @@ static inline void cache_alloc_hsw_probe(void) bool is_mba_sc(struct rdt_resource *r) { if (!r) - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; + r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); /* * The software controller support is only applicable to MBA resource. @@ -217,7 +210,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; - r->default_ctrl = MAX_MBA_BW; + r->membw.max_bw = MAX_MBA_BW; r->membw.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; @@ -228,16 +221,12 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) return false; r->membw.arch_needs_linear = false; } - r->data_width = 3; if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; else r->membw.throttle_mode = THREAD_THROTTLE_MAX; - resctrl_file_fflags_init("thread_throttle_mode", - RFTYPE_CTRL_INFO | RFTYPE_RES_MB); - r->alloc_capable = true; return true; @@ -256,7 +245,7 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; - r->default_ctrl = 1 << eax; + r->membw.max_bw = 1 << eax; /* AMD does not use delay */ r->membw.delay_linear = false; @@ -269,8 +258,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; - /* Max value is 2048, Data width should be 4 in decimal */ - r->data_width = 4; r->alloc_capable = true; @@ -283,14 +270,13 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) union cpuid_0x10_1_eax eax; union cpuid_0x10_x_ecx ecx; union cpuid_0x10_x_edx edx; - u32 ebx; + u32 ebx, default_ctrl; cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; - r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; - r->cache.shareable_bits = ebx & r->default_ctrl; - r->data_width = (r->cache.cbm_len + 3) / 4; + default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; + r->cache.shareable_bits = ebx & default_ctrl; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; @@ -337,7 +323,7 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) return MAX_MBA_BW - bw; pr_warn_once("Non Linear delay-bw map not supported but queried\n"); - return r->default_ctrl; + return MAX_MBA_BW; } static void mba_wrmsr_intel(struct msr_param *m) @@ -361,36 +347,6 @@ static void cat_wrmsr(struct msr_param *m) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_ctrl_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->ctrl_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) -{ - struct rdt_mon_domain *d; - - lockdep_assert_cpus_held(); - - list_for_each_entry(d, &r->mon_domains, hdr.list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) - return d; - } - - return NULL; -} - u32 resctrl_arch_get_num_closid(struct rdt_resource *r) { return resctrl_to_arch_res(r)->num_closid; @@ -405,36 +361,6 @@ void rdt_ctrl_update(void *arg) hw_res->msr_update(m); } -/* - * rdt_find_domain - Search for a domain id in a resource domain list. - * - * Search the domain list to find the domain id. If the domain id is - * found, return the domain. NULL otherwise. If the domain id is not - * found (and NULL returned) then the first domain with id bigger than - * the input id can be returned to the caller via @pos. - */ -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos) -{ - struct rdt_domain_hdr *d; - struct list_head *l; - - list_for_each(l, h) { - d = list_entry(l, struct rdt_domain_hdr, list); - /* When id is found, return its domain. */ - if (id == d->id) - return d; - /* Stop searching when finding id's position in sorted list. */ - if (id < d->id) - break; - } - - if (pos) - *pos = l; - - return NULL; -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -446,7 +372,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) * For Memory Allocation: Set b/w requested to 100% */ for (i = 0; i < hw_res->num_closid; i++, dc++) - *dc = r->default_ctrl; + *dc = resctrl_get_default_ctrl(r); } static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) @@ -494,13 +420,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_total); hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_total) return -ENOMEM; } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*hw_dom->arch_mbm_local); hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_local) { @@ -545,7 +471,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) return; @@ -600,7 +526,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos); if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) return; @@ -665,7 +591,7 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL); if (!hdr) { pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); @@ -711,7 +637,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) return; } - hdr = rdt_find_domain(&r->mon_domains, id, NULL); + hdr = resctrl_find_domain(&r->mon_domains, id, NULL); if (!hdr) { pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", id, cpu, r->name); @@ -786,20 +712,6 @@ static int resctrl_arch_offline_cpu(unsigned int cpu) return 0; } -/* - * Choose a width for the resource name and resource data based on the - * resource that has widest name and cbm. - */ -static __init void rdt_init_padding(void) -{ - struct rdt_resource *r; - - for_each_alloc_capable_rdt_resource(r) { - if (r->data_width > max_data_width) - max_data_width = r->data_width; - } -} - enum { RDT_FLAG_CMT, RDT_FLAG_MBM_TOTAL, @@ -885,6 +797,21 @@ bool __init rdt_cpu_has(int flag) return ret; } +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) +{ + if (!rdt_cpu_has(X86_FEATURE_BMEC)) + return false; + + switch (evt) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL); + case QOS_L3_MBM_LOCAL_EVENT_ID: + return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL); + default: + return false; + } +} + static __init bool get_mem_config(void) { struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA]; @@ -963,11 +890,6 @@ static __init bool get_rdt_mon_resources(void) if (!rdt_mon_features) return false; - if (is_mbm_local_enabled()) - mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (is_mbm_total_enabled()) - mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; - return !rdt_get_mon_l3_config(r); } @@ -1086,7 +1008,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) } } -static int __init resctrl_late_init(void) +static int __init resctrl_arch_late_init(void) { struct rdt_resource *r; int state, ret; @@ -1102,8 +1024,6 @@ static int __init resctrl_late_init(void) if (!get_rdt_resources()) return -ENODEV; - rdt_init_padding(); - state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/resctrl/cat:online:", resctrl_arch_online_cpu, @@ -1111,7 +1031,7 @@ static int __init resctrl_late_init(void) if (state < 0) return state; - ret = rdtgroup_init(); + ret = resctrl_init(); if (ret) { cpuhp_remove_state(state); return ret; @@ -1127,18 +1047,13 @@ static int __init resctrl_late_init(void) return 0; } -late_initcall(resctrl_late_init); +late_initcall(resctrl_arch_late_init); -static void __exit resctrl_exit(void) +static void __exit resctrl_arch_exit(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - cpuhp_remove_state(rdt_online); - rdtgroup_exit(); - - if (r->mon_capable) - rdt_put_mon_l3_config(); + resctrl_exit(); } -__exitcall(resctrl_exit); +__exitcall(resctrl_arch_exit); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 536351159cc2..0a0ac5f6112e 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,6 +23,15 @@ #include "internal.h" +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + +typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, + struct resctrl_schema *s, + struct rdt_ctrl_domain *d); + /* * Check whether MBA bandwidth percentage value is correct. The value is * checked against the minimum and max bandwidth values specified by the @@ -54,9 +63,9 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } - if (bw < r->membw.min_bw || bw > r->default_ctrl) { + if (bw < r->membw.min_bw || bw > r->membw.max_bw) { rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", - bw, r->membw.min_bw, r->default_ctrl); + bw, r->membw.min_bw, r->membw.max_bw); return false; } @@ -64,8 +73,8 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -104,8 +113,9 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, */ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) { - unsigned long first_bit, zero_bit, val; + u32 supported_bits = BIT_MASK(r->cache.cbm_len) - 1; unsigned int cbm_len = r->cache.cbm_len; + unsigned long first_bit, zero_bit, val; int ret; ret = kstrtoul(buf, 16, &val); @@ -114,7 +124,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return false; } - if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) { + if ((r->cache.min_cbm_bits > 0 && val == 0) || val > supported_bits) { rdt_last_cmd_puts("Mask out of range\n"); return false; } @@ -143,8 +153,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d) +static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -210,6 +220,7 @@ static int parse_line(char *line, struct resctrl_schema *s, struct rdtgroup *rdtgrp) { enum resctrl_conf_type t = s->conf_type; + ctrlval_parser_t *parse_ctrlval = NULL; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; struct rdt_parse_data data; @@ -220,6 +231,18 @@ static int parse_line(char *line, struct resctrl_schema *s, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + parse_ctrlval = &parse_cbm; + break; + case RESCTRL_SCHEMA_RANGE: + parse_ctrlval = &parse_bw; + break; + } + + if (WARN_ON_ONCE(!parse_ctrlval)) + return -EINVAL; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) { rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); @@ -240,7 +263,7 @@ next: if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; - if (r->parse_ctrlval(&data, s, d)) + if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { cfg = &d->staged_config[t]; @@ -264,25 +287,12 @@ next: return -EINVAL; } -static u32 get_config_index(u32 closid, enum resctrl_conf_type type) -{ - switch (type) { - default: - case CDP_NONE: - return closid; - case CDP_CODE: - return closid * 2 + 1; - case CDP_DATA: - return closid * 2; - } -} - int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - u32 idx = get_config_index(closid, t); + u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) @@ -319,7 +329,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) if (!cfg->have_new_ctrl) continue; - idx = get_config_index(closid, t); + idx = resctrl_get_config_index(closid, t); if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) continue; hw_dom->ctrl_val[idx] = cfg->new_ctrl; @@ -439,7 +449,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); - u32 idx = get_config_index(closid, type); + u32 idx = resctrl_get_config_index(closid, type); return hw_dom->ctrl_val[idx]; } @@ -465,8 +475,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->hdr.id, max_data_width, - ctrl_val); + seq_printf(s, schema->fmt_str, dom->hdr.id, ctrl_val); sep = true; } seq_puts(s, "\n"); @@ -537,12 +546,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!strcmp(buf, "mbm_local_bytes")) { - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; else ret = -EINVAL; } else if (!strcmp(buf, "mbm_total_bytes")) { - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; else ret = -EINVAL; @@ -588,6 +597,28 @@ int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of, return ret; } +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos) +{ + struct rdt_domain_hdr *d; + struct list_head *l; + + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); + /* When id is found, return its domain. */ + if (id == d->id) + return d; + /* Stop searching when finding id's position in sorted list. */ + if (id < d->id) + break; + } + + if (pos) + *pos = l; + + return NULL; +} + void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first) @@ -649,7 +680,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) resid = md.u.rid; domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid].r_resctrl; + r = resctrl_arch_get_resource(resid); if (md.u.sum) { /* @@ -673,7 +704,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * This file provides data from a single domain. Search * the resource to find the domain with "domid". */ - hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + hdr = resctrl_find_domain(&r->mon_domains, domid, NULL); if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { ret = -ENOENT; goto out; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 20c898f09b7e..eaae99602b61 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -32,30 +32,6 @@ */ #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) -/* Reads to Local DRAM Memory */ -#define READS_TO_LOCAL_MEM BIT(0) - -/* Reads to Remote DRAM Memory */ -#define READS_TO_REMOTE_MEM BIT(1) - -/* Non-Temporal Writes to Local Memory */ -#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) - -/* Non-Temporal Writes to Remote Memory */ -#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) - -/* Reads to Local Memory the system identifies as "Slow Memory" */ -#define READS_TO_LOCAL_S_MEM BIT(4) - -/* Reads to Remote Memory the system identifies as "Slow Memory" */ -#define READS_TO_REMOTE_S_MEM BIT(5) - -/* Dirty Victims to All Types of Memory */ -#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) - -/* Max event bits supported */ -#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) - /** * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that * aren't marked nohz_full @@ -180,7 +156,6 @@ struct rmid_read { void *arch_mon_ctx; }; -extern unsigned int rdt_mon_features; extern struct list_head resctrl_schema_all; extern bool resctrl_mounted; @@ -234,43 +209,6 @@ struct mongroup { }; /** - * struct pseudo_lock_region - pseudo-lock region information - * @s: Resctrl schema for the resource to which this - * pseudo-locked region belongs - * @d: RDT domain to which this pseudo-locked region - * belongs - * @cbm: bitmask of the pseudo-locked region - * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread - * completion - * @thread_done: variable used by waitqueue to test if pseudo-locking - * thread completed - * @cpu: core associated with the cache on which the setup code - * will be run - * @line_size: size of the cache lines - * @size: size of pseudo-locked region in bytes - * @kmem: the kernel memory associated with pseudo-locked region - * @minor: minor number of character device associated with this - * region - * @debugfs_dir: pointer to this region's directory in the debugfs - * filesystem - * @pm_reqs: Power management QoS requests related to this region - */ -struct pseudo_lock_region { - struct resctrl_schema *s; - struct rdt_ctrl_domain *d; - u32 cbm; - wait_queue_head_t lock_thread_wq; - int thread_done; - int cpu; - unsigned int line_size; - unsigned int size; - void *kmem; - unsigned int minor; - struct dentry *debugfs_dir; - struct list_head pm_reqs; -}; - -/** * struct rdtgroup - store rdtgroup's data in resctrl file system. * @kn: kernfs node * @rdtgroup_list: linked list for all rdtgroups @@ -326,10 +264,7 @@ struct rdtgroup { /* List of all resource groups */ extern struct list_head rdt_all_groups; -extern int max_name_width, max_data_width; - -int __init rdtgroup_init(void); -void __exit rdtgroup_exit(void); +extern int max_name_width; /** * struct rftype - describe each file in the resctrl file system @@ -433,37 +368,6 @@ struct msr_param { u32 high; }; -static inline bool is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - -static inline bool is_mbm_enabled(void) -{ - return (is_mbm_total_enabled() || is_mbm_local_enabled()); -} - -static inline bool is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - -struct rdt_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /** * struct rdt_hw_resource - arch private attributes of a resctrl resource * @r_resctrl: Attributes of the resource used directly by resctrl. @@ -476,8 +380,6 @@ struct rdt_parse_data { * @msr_update: Function pointer to update QOS MSRs * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. - * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth - * Monitoring Event Configuration (BMEC) is supported. * @cdp_enabled: CDP state of this resource * * Members of this structure are either private to the architecture @@ -491,7 +393,6 @@ struct rdt_hw_resource { void (*msr_update)(struct msr_param *m); unsigned int mon_scale; unsigned int mbm_width; - unsigned int mbm_cfg_mask; bool cdp_enabled; }; @@ -500,36 +401,18 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r return container_of(r, struct rdt_hw_resource, r_resctrl); } -int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); -int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_ctrl_domain *d); - extern struct mutex rdtgroup_mutex; +static inline const char *rdt_kn_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex)); +} + extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; extern enum resctrl_event_id mba_mbps_default_event; -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - -static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res) -{ - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res); - - hw_res++; - return &hw_res->r_resctrl; -} - static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) { return rdt_resources_all[l].cdp_enabled; @@ -539,27 +422,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); -/* - * To return the common struct rdt_resource, which is contained in struct - * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. - */ -#define for_each_rdt_resource(r) \ - for (r = &rdt_resources_all[0].r_resctrl; \ - r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \ - r = resctrl_inc(r)) - -#define for_each_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable || r->mon_capable) - -#define for_each_alloc_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_capable) - -#define for_each_mon_capable_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->mon_capable) - /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { struct { @@ -604,8 +466,6 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn); int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, umode_t mask); -struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, - struct list_head **pos); ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, @@ -620,28 +480,19 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); -int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); -int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); -int rdt_pseudo_lock_init(void); -void rdt_pseudo_lock_release(void); -int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); -void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); -struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r); -struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); int alloc_rmid(u32 closid); void free_rmid(u32 closid, u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); -void __exit rdt_put_mon_l3_config(void); +void resctrl_mon_resource_exit(void); bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, cpumask_t *cpumask, int evtid, int first); +int __init resctrl_mon_resource_init(void); void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); @@ -658,4 +509,45 @@ void resctrl_file_fflags_init(const char *config, unsigned long fflags); void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); +int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); +int rdt_pseudo_lock_init(void); +void rdt_pseudo_lock_release(void); +int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); +void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); +#else +static inline int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) +{ + return false; +} + +static inline bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) +{ + return false; +} + +static inline int rdt_pseudo_lock_init(void) { return 0; } +static inline void rdt_pseudo_lock_release(void) { } +static inline int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) +{ + return -EOPNOTSUPP; +} + +static inline void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp) { } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ + #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 94a1d9780461..a93ed7d2a160 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -295,11 +295,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } @@ -365,7 +365,7 @@ static void limbo_release_entry(struct rmid_entry *entry) */ void __check_limbo(struct rdt_mon_domain *d, bool force_free) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); struct rmid_entry *entry; u32 idx, cur_idx = 1; @@ -521,7 +521,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; u32 idx; @@ -569,7 +569,7 @@ void free_rmid(u32 closid, u32 rmid) entry = __rmid_entry(idx); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -718,6 +718,22 @@ void mon_event_count(void *info) rr->err = 0; } +static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_ctrl_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + /* * Feedback loop for MBA software controller (mba_sc) * @@ -761,7 +777,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) struct rdtgroup *entry; u32 cur_bw, user_bw; - r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); evt_id = rgrp->mba_mbps_event; closid = rgrp->closid; @@ -852,10 +868,10 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); } @@ -925,7 +941,7 @@ void mbm_handle_overflow(struct work_struct *work) if (!resctrl_mounted || !resctrl_arch_mon_capable()) goto out_unlock; - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -1027,7 +1043,7 @@ static int dom_data_init(struct rdt_resource *r) /* * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and * are always allocated. These are used for the rdtgroup_default - * control group, which will be setup later in rdtgroup_init(). + * control group, which will be setup later in resctrl_init(). */ idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID); @@ -1040,10 +1056,13 @@ out_unlock: return err; } -static void __exit dom_data_exit(void) +static void dom_data_exit(struct rdt_resource *r) { mutex_lock(&rdtgroup_mutex); + if (!r->mon_capable) + goto out_unlock; + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { kfree(closid_num_dirty_rmid); closid_num_dirty_rmid = NULL; @@ -1052,6 +1071,7 @@ static void __exit dom_data_exit(void) kfree(rmid_ptrs); rmid_ptrs = NULL; +out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -1081,11 +1101,11 @@ static void l3_mon_evt_init(struct rdt_resource *r) { INIT_LIST_HEAD(&r->evt_list); - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (is_mbm_total_enabled()) + if (resctrl_arch_is_mbm_total_enabled()) list_add_tail(&mbm_total_event.list, &r->evt_list); - if (is_mbm_local_enabled()) + if (resctrl_arch_is_mbm_local_enabled()) list_add_tail(&mbm_local_event.list, &r->evt_list); } @@ -1172,12 +1192,56 @@ static __init int snc_get_config(void) return ret; } +/** + * resctrl_mon_resource_init() - Initialise global monitoring structures. + * + * Allocate and initialise global monitor resources that do not belong to a + * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists. + * Called once during boot after the struct rdt_resource's have been configured + * but before the filesystem is mounted. + * Resctrl's cpuhp callbacks may be called before this point to bring a domain + * online. + * + * Returns 0 for success, or -ENOMEM. + */ +int __init resctrl_mon_resource_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + int ret; + + if (!r->mon_capable) + return 0; + + ret = dom_data_init(r); + if (ret) + return ret; + + l3_mon_evt_init(r); + + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { + mbm_total_event.configurable = true; + resctrl_file_fflags_init("mbm_total_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { + mbm_local_event.configurable = true; + resctrl_file_fflags_init("mbm_local_bytes_config", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + } + + if (resctrl_arch_is_mbm_local_enabled()) + mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; + else if (resctrl_arch_is_mbm_total_enabled()) + mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; + + return 0; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; - int ret; snc_nodes_per_l3_cache = snc_get_config(); @@ -1207,39 +1271,24 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); - ret = dom_data_init(r); - if (ret) - return ret; - if (rdt_cpu_has(X86_FEATURE_BMEC)) { u32 eax, ebx, ecx, edx; /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; - - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { - mbm_total_event.configurable = true; - resctrl_file_fflags_init("mbm_total_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { - mbm_local_event.configurable = true; - resctrl_file_fflags_init("mbm_local_bytes_config", - RFTYPE_MON_INFO | RFTYPE_RES_CACHE); - } + r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - l3_mon_evt_init(r); - r->mon_capable = true; return 0; } -void __exit rdt_put_mon_l3_config(void) +void resctrl_mon_resource_exit(void) { - dom_data_exit(); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + dom_data_exit(r); } void __init intel_rdt_mbm_apply_quirk(void) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 42cc162f7fc9..92ea1472bde9 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -52,7 +52,8 @@ static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) rdtgrp = dev_get_drvdata(dev); if (mode) *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); + guard(mutex)(&rdtgroup_mutex); + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn)); } static const struct class pseudo_lock_class = { @@ -61,7 +62,8 @@ static const struct class pseudo_lock_class = { }; /** - * get_prefetch_disable_bits - prefetch disable bits of supported platforms + * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported + * platforms * @void: It takes no parameters. * * Capture the list of platforms that have been validated to support @@ -75,14 +77,16 @@ static const struct class pseudo_lock_class = { * in the SDM. * * When adding a platform here also add support for its cache events to - * measure_cycles_perf_fn() + * resctrl_arch_measure_l*_residency() * * Return: * If platform is supported, the bits to disable hardware prefetchers, 0 * if platform is not supported. */ -static u64 get_prefetch_disable_bits(void) +u64 resctrl_arch_get_prefetch_disable_bits(void) { + prefetch_disable_bits = 0; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) return 0; @@ -98,7 +102,8 @@ static u64 get_prefetch_disable_bits(void) * 3 DCU IP Prefetcher Disable (R/W) * 63:4 Reserved */ - return 0xF; + prefetch_disable_bits = 0xF; + break; case INTEL_ATOM_GOLDMONT: case INTEL_ATOM_GOLDMONT_PLUS: /* @@ -109,10 +114,11 @@ static u64 get_prefetch_disable_bits(void) * 2 DCU Hardware Prefetcher Disable (R/W) * 63:3 Reserved */ - return 0x5; + prefetch_disable_bits = 0x5; + break; } - return 0; + return prefetch_disable_bits; } /** @@ -408,8 +414,8 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) } /** - * pseudo_lock_fn - Load kernel memory into cache - * @_rdtgrp: resource group to which pseudo-lock region belongs + * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache + * @_plr: the pseudo-lock region descriptor * * This is the core pseudo-locking flow. * @@ -426,10 +432,9 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int pseudo_lock_fn(void *_rdtgrp) +int resctrl_arch_pseudo_lock_fn(void *_plr) { - struct rdtgroup *rdtgrp = _rdtgrp; - struct pseudo_lock_region *plr = rdtgrp->plr; + struct pseudo_lock_region *plr = _plr; u32 rmid_p, closid_p; unsigned long i; u64 saved_msr; @@ -489,7 +494,8 @@ static int pseudo_lock_fn(void *_rdtgrp) * pseudo-locked followed by reading of kernel memory to load it * into the cache. */ - __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid); + __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid); + /* * Cache was flushed earlier. Now access kernel memory to read it * into cache region associated with just activated plr->closid. @@ -712,8 +718,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * Not knowing the bits to disable prefetching implies that this * platform does not support Cache Pseudo-Locking. */ - prefetch_disable_bits = get_prefetch_disable_bits(); - if (prefetch_disable_bits == 0) { + if (resctrl_arch_get_prefetch_disable_bits() == 0) { rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; } @@ -872,7 +877,8 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) } /** - * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory + * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read + * pseudo-locked memory * @_plr: pseudo-lock region to measure * * There is no deterministic way to test if a memory region is cached. One @@ -885,7 +891,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) * * Return: 0. Waiter on waitqueue will be woken on completion. */ -static int measure_cycles_lat_fn(void *_plr) +int resctrl_arch_measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; u32 saved_low, saved_high; @@ -1069,7 +1075,7 @@ out: return 0; } -static int measure_l2_residency(void *_plr) +int resctrl_arch_measure_l2_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1107,7 +1113,7 @@ out: return 0; } -static int measure_l3_residency(void *_plr) +int resctrl_arch_measure_l3_residency(void *_plr) { struct pseudo_lock_region *plr = _plr; struct residency_counts counts = {0}; @@ -1205,14 +1211,14 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) plr->cpu = cpu; if (sel == 1) - thread = kthread_run_on_cpu(measure_cycles_lat_fn, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_cycles_lat_fn, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 2) - thread = kthread_run_on_cpu(measure_l2_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l2_residency, + plr, cpu, "pseudo_lock_measure/%u"); else if (sel == 3) - thread = kthread_run_on_cpu(measure_l3_residency, plr, - cpu, "pseudo_lock_measure/%u"); + thread = kthread_run_on_cpu(resctrl_arch_measure_l3_residency, + plr, cpu, "pseudo_lock_measure/%u"); else goto out; @@ -1293,6 +1299,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) struct task_struct *thread; unsigned int new_minor; struct device *dev; + char *kn_name __free(kfree) = NULL; int ret; ret = pseudo_lock_region_alloc(plr); @@ -1304,10 +1311,15 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) ret = -EINVAL; goto out_region; } + kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL); + if (!kn_name) { + ret = -ENOMEM; + goto out_cstates; + } plr->thread_done = 0; - thread = kthread_run_on_cpu(pseudo_lock_fn, rdtgrp, + thread = kthread_run_on_cpu(resctrl_arch_pseudo_lock_fn, plr, plr->cpu, "pseudo_lock/%u"); if (IS_ERR(thread)) { ret = PTR_ERR(thread); @@ -1348,8 +1360,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) mutex_unlock(&rdtgroup_mutex); if (!IS_ERR_OR_NULL(debugfs_resctrl)) { - plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, - debugfs_resctrl); + plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl); if (!IS_ERR_OR_NULL(plr->debugfs_dir)) debugfs_create_file("pseudo_lock_measure", 0200, plr->debugfs_dir, rdtgrp, @@ -1358,7 +1369,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) dev = device_create(&pseudo_lock_class, NULL, MKDEV(pseudo_lock_major, new_minor), - rdtgrp, "%s", rdtgrp->kn->name); + rdtgrp, "%s", kn_name); mutex_lock(&rdtgroup_mutex); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6419e04d8a7b..cc4a54145c83 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -57,6 +57,12 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; +/* + * Used to store the max resource name width to display the schemata names in + * a tabular format. + */ +int max_name_width; + static struct seq_buf last_cmd_status; static char last_cmd_status_buf[512]; @@ -111,6 +117,18 @@ void rdt_staged_configs_clear(void) } } +static bool resctrl_is_mbm_enabled(void) +{ + return (resctrl_arch_is_mbm_total_enabled() || + resctrl_arch_is_mbm_local_enabled()); +} + +static bool resctrl_is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -157,7 +175,8 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) { + if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && + resctrl_arch_is_llc_occupancy_enabled()) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -348,13 +367,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. */ -static void update_cpu_closid_rmid(void *info) +void resctrl_arch_sync_cpu_closid_rmid(void *info) { - struct rdtgroup *r = info; + struct resctrl_cpu_defaults *r = info; if (r) { this_cpu_write(pqr_state.default_closid, r->closid); - this_cpu_write(pqr_state.default_rmid, r->mon.rmid); + this_cpu_write(pqr_state.default_rmid, r->rmid); } /* @@ -369,11 +388,20 @@ static void update_cpu_closid_rmid(void *info) * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, * * Per task closids/rmids must have been set up before calling this function. + * @r may be NULL. */ static void update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r) { - on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1); + struct resctrl_cpu_defaults defaults, *p = NULL; + + if (r) { + defaults.closid = r->closid; + defaults.rmid = r->mon.rmid; + p = &defaults; + } + + on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1); } static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, @@ -916,14 +944,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, continue; seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", - rdtg->kn->name); + rdt_kn_name(rdtg->kn)); seq_puts(s, "mon:"); list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, mon.crdtgrp_list) { if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, crg->mon.rmid)) continue; - seq_printf(s, "%s", crg->kn->name); + seq_printf(s, "%s", rdt_kn_name(crg->kn)); break; } seq_putc(s, '\n'); @@ -956,10 +984,20 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } +static void *rdt_kn_parent_priv(struct kernfs_node *kn) +{ + /* + * The parent pointer is only valid within RCU section since it can be + * replaced. + */ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%u\n", s->num_closid); return 0; @@ -968,17 +1006,17 @@ static int rdt_num_closids_show(struct kernfs_open_file *of, static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - seq_printf(seq, "%x\n", r->default_ctrl); + seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); return 0; } static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); @@ -988,7 +1026,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); @@ -1012,7 +1050,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. @@ -1094,7 +1132,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); @@ -1104,7 +1142,7 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, static int rdt_num_rmids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%d\n", r->num_rmid); @@ -1114,7 +1152,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, static int rdt_mon_features_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; list_for_each_entry(mevt, &r->evt_list, list) { @@ -1129,7 +1167,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); @@ -1139,7 +1177,7 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); @@ -1157,13 +1195,22 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + switch (r->membw.throttle_mode) { + case THREAD_THROTTLE_PER_THREAD: seq_puts(seq, "per-thread\n"); - else + return 0; + case THREAD_THROTTLE_MAX: seq_puts(seq, "max\n"); + return 0; + case THREAD_THROTTLE_UNDEFINED: + seq_puts(seq, "undefined\n"); + return 0; + } + + WARN_ON_ONCE(1); return 0; } @@ -1222,7 +1269,7 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); @@ -1425,7 +1472,8 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; } rdtgrp->mode = RDT_MODE_EXCLUSIVE; - } else if (!strcmp(buf, "pseudo-locksetup")) { + } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) && + !strcmp(buf, "pseudo-locksetup")) { ret = rdtgroup_locksetup_enter(rdtgrp); if (ret) goto out; @@ -1552,11 +1600,6 @@ out: return ret; } -struct mon_config_info { - u32 evtid; - u32 mon_config; -}; - #define INVALID_CONFIG_INDEX UINT_MAX /** @@ -1581,31 +1624,32 @@ static inline unsigned int mon_event_config_index_get(u32 evtid) } } -static void mon_event_config_read(void *info) +void resctrl_arch_mon_event_config_read(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; u64 msrval; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval); /* Report only the valid event configuration bits */ - mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; + config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; } -static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info) +static void mondata_config_read(struct resctrl_mon_config_info *mon_info) { - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1); + smp_call_function_any(&mon_info->d->hdr.cpu_mask, + resctrl_arch_mon_event_config_read, mon_info, 1); } static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct mon_config_info mon_info; + struct resctrl_mon_config_info mon_info; struct rdt_mon_domain *dom; bool sep = false; @@ -1616,9 +1660,11 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (sep) seq_puts(s, ";"); - memset(&mon_info, 0, sizeof(struct mon_config_info)); + memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info)); + mon_info.r = r; + mon_info.d = dom; mon_info.evtid = evtid; - mondata_config_read(dom, &mon_info); + mondata_config_read(&mon_info); seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config); sep = true; @@ -1634,7 +1680,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid static int mbm_total_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); @@ -1644,37 +1690,39 @@ static int mbm_total_bytes_config_show(struct kernfs_open_file *of, static int mbm_local_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); return 0; } -static void mon_event_config_write(void *info) +void resctrl_arch_mon_event_config_write(void *_config_info) { - struct mon_config_info *mon_info = info; + struct resctrl_mon_config_info *config_info = _config_info; unsigned int index; - index = mon_event_config_index_get(mon_info->evtid); + index = mon_event_config_index_get(config_info->evtid); if (index == INVALID_CONFIG_INDEX) { - pr_warn_once("Invalid event id %d\n", mon_info->evtid); + pr_warn_once("Invalid event id %d\n", config_info->evtid); return; } - wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); + wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0); } static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_mon_domain *d, u32 evtid, u32 val) { - struct mon_config_info mon_info = {0}; + struct resctrl_mon_config_info mon_info = {0}; /* * Read the current config value first. If both are the same then * no need to write it again. */ + mon_info.r = r; + mon_info.d = d; mon_info.evtid = evtid; - mondata_config_read(d, &mon_info); + mondata_config_read(&mon_info); if (mon_info.mon_config == val) return; @@ -1686,7 +1734,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write, + smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write, &mon_info, 1); /* @@ -1703,7 +1751,6 @@ static void mbm_config_write_domain(struct rdt_resource *r, static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); char *dom_str = NULL, *id_str; unsigned long dom_id, val; struct rdt_mon_domain *d; @@ -1730,9 +1777,9 @@ next: } /* Value from user cannot be more than the supported set of events */ - if ((val & hw_res->mbm_cfg_mask) != val) { + if ((val & r->mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - hw_res->mbm_cfg_mask); + r->mbm_cfg_mask); return -EINVAL; } @@ -1750,7 +1797,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -1776,7 +1823,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -2036,6 +2083,28 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name) return NULL; } +static void thread_throttle_mode_init(void) +{ + enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED; + struct rdt_resource *r_mba, *r_smba; + + r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); + if (r_mba->alloc_capable && + r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_mba->membw.throttle_mode; + + r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA); + if (r_smba->alloc_capable && + r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_smba->membw.throttle_mode; + + if (throttle_mode == THREAD_THROTTLE_UNDEFINED) + return; + + resctrl_file_fflags_init("thread_throttle_mode", + RFTYPE_CTRL_INFO | RFTYPE_RES_MB); +} + void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -2164,6 +2233,20 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, return ret; } +static unsigned long fflags_from_resource(struct rdt_resource *r) +{ + switch (r->rid) { + case RDT_RESOURCE_L3: + case RDT_RESOURCE_L2: + return RFTYPE_RES_CACHE; + case RDT_RESOURCE_MBA: + case RDT_RESOURCE_SMBA: + return RFTYPE_RES_MB; + } + + return WARN_ON_ONCE(1); +} + static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { struct resctrl_schema *s; @@ -2184,14 +2267,14 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = r->fflags | RFTYPE_CTRL_INFO; + fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } for_each_mon_capable_rdt_resource(r) { - fflags = r->fflags | RFTYPE_MON_INFO; + fflags = fflags_from_resource(r) | RFTYPE_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); if (ret) @@ -2255,7 +2338,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -2345,10 +2428,10 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, */ static bool supports_mba_mbps(void) { - struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); - return (is_mbm_enabled() && + return (resctrl_is_mbm_enabled() && r->alloc_capable && is_mba_linear() && r->ctrl_scope == rmbm->mon_scope); } @@ -2359,7 +2442,7 @@ static bool supports_mba_mbps(void) */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_ctrl_domain *d; unsigned long fflags; @@ -2440,12 +2523,13 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) * resource. "info" and its subdirectories don't * have rdtgroup structures, so return NULL here. */ - if (kn == kn_info || kn->parent == kn_info) + if (kn == kn_info || + rcu_access_pointer(kn->__parent) == kn_info) return NULL; else return kn->priv; } else { - return kn->parent->priv; + return rdt_kn_parent_priv(kn); } } @@ -2596,6 +2680,20 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + s->fmt_str = "%d=%x"; + break; + case RESCTRL_SCHEMA_RANGE: + s->fmt_str = "%d=%u"; + break; + } + + if (WARN_ON_ONCE(!s->fmt_str)) { + kfree(s); + return -EINVAL; + } + INIT_LIST_HEAD(&s->list); list_add(&s->list, &resctrl_schema_all); @@ -2712,8 +2810,8 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable()) resctrl_mounted = true; - if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + if (resctrl_is_mbm_enabled()) { + r = resctrl_arch_get_resource(RDT_RESOURCE_L3); list_for_each_entry(dom, &r->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); @@ -2823,7 +2921,7 @@ static int rdt_init_fs_context(struct fs_context *fc) return 0; } -static int reset_all_ctrls(struct rdt_resource *r) +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_ctrl_domain *hw_dom; @@ -2847,12 +2945,12 @@ static int reset_all_ctrls(struct rdt_resource *r) hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = r->default_ctrl; + hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r); msr_param.dom = d; smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } - return 0; + return; } /* @@ -2971,9 +3069,10 @@ static void rdt_kill_sb(struct super_block *sb) rdt_disable_ctx(); - /*Put everything back to default values. */ + /* Put everything back to default values. */ for_each_alloc_capable_rdt_resource(r) - reset_all_ctrls(r); + resctrl_arch_reset_all_ctrls(r); + rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; @@ -3080,7 +3179,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, if (ret) return ret; - if (!do_sum && is_mbm_event(mevt->evtid)) + if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); } @@ -3382,7 +3481,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) } cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = r->default_ctrl; + cfg->new_ctrl = resctrl_get_default_ctrl(r); cfg->have_new_ctrl = true; } } @@ -3454,6 +3553,22 @@ static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) free_rmid(rgrp->closid, rgrp->mon.rmid); } +/* + * We allow creating mon groups only with in a directory called "mon_groups" + * which is present in every ctrl_mon group. Check if this is a valid + * "mon_groups" directory. + * + * 1. The directory should be named "mon_groups". + * 2. The mon group itself should "not" be named "mon_groups". + * This makes sure "mon_groups" directory always has a ctrl_mon group + * as parent. + */ +static bool is_mon_groups(struct kernfs_node *kn, const char *name) +{ + return (!strcmp(rdt_kn_name(kn), "mon_groups") && + strcmp(name, "mon_groups")); +} + static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) @@ -3469,6 +3584,15 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_unlock; } + /* + * Check that the parent directory for a monitor group is a "mon_groups" + * directory. + */ + if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) { + ret = -EPERM; + goto out_unlock; + } + if (rtype == RDTMON_GROUP && (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { @@ -3652,22 +3776,6 @@ out_unlock: return ret; } -/* - * We allow creating mon groups only with in a directory called "mon_groups" - * which is present in every ctrl_mon group. Check if this is a valid - * "mon_groups" directory. - * - * 1. The directory should be named "mon_groups". - * 2. The mon group itself should "not" be named "mon_groups". - * This makes sure "mon_groups" directory always has a ctrl_mon group - * as parent. - */ -static bool is_mon_groups(struct kernfs_node *kn, const char *name) -{ - return (!strcmp(kn->name, "mon_groups") && - strcmp(name, "mon_groups")); -} - static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { @@ -3683,11 +3791,8 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); - /* - * If RDT monitoring is supported and the parent directory is a valid - * "mon_groups" directory, add a monitoring subdirectory. - */ - if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) + /* Else, attempt to add a monitoring subdirectory. */ + if (resctrl_arch_mon_capable()) return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; @@ -3696,14 +3801,21 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + u32 closid, rmid; int cpu; /* Give any tasks back to the parent group */ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask); - /* Update per cpu rmid of the moved CPUs first */ + /* + * Update per cpu closid/rmid of the moved CPUs first. + * Note: the closid will not change, but the arch code still needs it. + */ + closid = prdtgrp->closid; + rmid = prdtgrp->mon.rmid; for_each_cpu(cpu, &rdtgrp->cpu_mask) - per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); + /* * Update the MSR on moved CPUs and CPUs which have moved * task running on them. @@ -3736,6 +3848,7 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { + u32 closid, rmid; int cpu; /* Give any tasks back to the default group */ @@ -3746,10 +3859,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); /* Update per cpu closid and rmid of the moved CPUs first */ - for_each_cpu(cpu, &rdtgrp->cpu_mask) { - per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; - per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; - } + closid = rdtgroup_default.closid; + rmid = rdtgroup_default.mon.rmid; + for_each_cpu(cpu, &rdtgrp->cpu_mask) + resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid); /* * Update the MSR on moved CPUs and CPUs which have moved @@ -3771,9 +3884,18 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) return 0; } +static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) +{ + /* + * Valid within the RCU section it was obtained or while rdtgroup_mutex + * is held. + */ + return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); +} + static int rdtgroup_rmdir(struct kernfs_node *kn) { - struct kernfs_node *parent_kn = kn->parent; + struct kernfs_node *parent_kn; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; int ret = 0; @@ -3786,6 +3908,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = -EPERM; goto out; } + parent_kn = rdt_kn_parent(kn); /* * If the rdtgroup is a ctrl_mon group and parent directory @@ -3803,7 +3926,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && - is_mon_groups(parent_kn, kn->name)) { + is_mon_groups(parent_kn, rdt_kn_name(kn))) { ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; @@ -3854,6 +3977,7 @@ static void mongrp_reparent(struct rdtgroup *rdtgrp, static int rdtgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { + struct kernfs_node *kn_parent; struct rdtgroup *new_prdtgrp; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; @@ -3888,8 +4012,9 @@ static int rdtgroup_rename(struct kernfs_node *kn, goto out; } - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || - !is_mon_groups(kn->parent, kn->name)) { + kn_parent = rdt_kn_parent(kn); + if (rdtgrp->type != RDTMON_GROUP || !kn_parent || + !is_mon_groups(kn_parent, rdt_kn_name(kn))) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; @@ -3950,7 +4075,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) + if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA))) seq_puts(seq, ",mba_MBps"); if (resctrl_debug) @@ -4029,9 +4154,9 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d if (resctrl_mounted && resctrl_arch_mon_capable()) rmdir_mondata_subdir_allrdtgrp(r, d); - if (is_mbm_enabled()) + if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4049,17 +4174,30 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d mutex_unlock(&rdtgroup_mutex); } +/** + * domain_setup_mon_state() - Initialise domain monitoring structures. + * @r: The resource for the newly online domain. + * @d: The newly online domain. + * + * Allocate monitor resources that belong to this domain. + * Called when the first CPU of a domain comes online, regardless of whether + * the filesystem is mounted. + * During boot this may be called before global allocations have been made by + * resctrl_mon_resource_init(). + * + * Returns 0 for success, or -ENOMEM. + */ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; - if (is_llc_occupancy_enabled()) { + if (resctrl_arch_is_llc_occupancy_enabled()) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (is_mbm_total_enabled()) { + if (resctrl_arch_is_mbm_total_enabled()) { tsize = sizeof(*d->mbm_total); d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_total) { @@ -4067,7 +4205,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain return -ENOMEM; } } - if (is_mbm_local_enabled()) { + if (resctrl_arch_is_mbm_local_enabled()) { tsize = sizeof(*d->mbm_local); d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_local) { @@ -4106,13 +4244,13 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) if (err) goto out_unlock; - if (is_mbm_enabled()) { + if (resctrl_is_mbm_enabled()) { INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } - if (is_llc_occupancy_enabled()) + if (resctrl_arch_is_llc_occupancy_enabled()) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4148,9 +4286,25 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) } } +static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, + struct rdt_resource *r) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + void resctrl_offline_cpu(unsigned int cpu) { - struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; @@ -4167,12 +4321,12 @@ void resctrl_offline_cpu(unsigned int cpu) d = get_mon_domain_from_cpu(cpu, l3); if (d) { - if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { + if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && - has_busy_rmid(d)) { + if (resctrl_arch_is_llc_occupancy_enabled() && + cpu == d->cqm_work_cpu && has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); } @@ -4183,14 +4337,14 @@ out_unlock: } /* - * rdtgroup_init - rdtgroup initialization + * resctrl_init - resctrl filesystem initialization * * Setup resctrl file system including set up root, create mount point, - * register rdtgroup filesystem, and initialize files under root directory. + * register resctrl filesystem, and initialize files under root directory. * * Return: 0 on success or -errno */ -int __init rdtgroup_init(void) +int __init resctrl_init(void) { int ret = 0; @@ -4199,10 +4353,18 @@ int __init rdtgroup_init(void) rdtgroup_setup_default(); - ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + thread_throttle_mode_init(); + + ret = resctrl_mon_resource_init(); if (ret) return ret; + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) { + resctrl_mon_resource_exit(); + return ret; + } + ret = register_filesystem(&rdt_fs_type); if (ret) goto cleanup_mountpoint; @@ -4234,13 +4396,16 @@ int __init rdtgroup_init(void) cleanup_mountpoint: sysfs_remove_mount_point(fs_kobj, "resctrl"); + resctrl_mon_resource_exit(); return ret; } -void __exit rdtgroup_exit(void) +void __exit resctrl_exit(void) { debugfs_remove_recursive(debugfs_resctrl); unregister_filesystem(&rdt_fs_type); sysfs_remove_mount_point(fs_kobj, "resctrl"); + + resctrl_mon_resource_exit(); } |