diff options
Diffstat (limited to 'drivers/cxl')
-rw-r--r-- | drivers/cxl/Kconfig | 12 | ||||
-rw-r--r-- | drivers/cxl/acpi.c | 4 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 7 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 25 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 226 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 1 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 12 | ||||
-rw-r--r-- | drivers/cxl/core/pmem.c | 42 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 92 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 867 | ||||
-rw-r--r-- | drivers/cxl/core/trace.h | 480 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 73 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 180 | ||||
-rw-r--r-- | drivers/cxl/cxlpci.h | 6 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 239 | ||||
-rw-r--r-- | drivers/cxl/pmem.c | 25 | ||||
-rw-r--r-- | drivers/cxl/port.c | 112 |
17 files changed, 2195 insertions, 208 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 9e709ecba50f..ff4e78117b31 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -104,12 +104,22 @@ config CXL_SUSPEND depends on SUSPEND && CXL_MEM config CXL_REGION - bool + bool "CXL: Region Support" default CXL_BUS # For MAX_PHYSMEM_BITS depends on SPARSEMEM select MEMREGION select GET_FREE_REGION + help + Enable the CXL core to enumerate and provision CXL regions. A CXL + region is defined by one or more CXL expanders that decode a given + system-physical address range. For CXL regions established by + platform-firmware this option enables memory error handling to + identify the devices participating in a given interleaved memory + range. Otherwise, platform-firmware managed CXL is enabled by being + placed in the system address map and does not need a driver. + + If unsure say 'y' config CXL_REGION_INVALIDATION_TEST bool "CXL: Region Cache Management Bypass (TEST)" diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 6927149f2a16..7e1765b09e04 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -731,9 +731,9 @@ static void __exit cxl_acpi_exit(void) cxl_bus_drain(); } -module_init(cxl_acpi_init); +/* load before dax_hmem sees 'Soft Reserved' CXL ranges */ +subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(ACPI); -MODULE_SOFTDEP("pre: cxl_pmem"); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 8c04672dca56..cde475e13216 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -11,15 +11,18 @@ extern struct attribute_group cxl_base_attribute_group; #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; +extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; +extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) +#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) int cxl_region_init(void); void cxl_region_exit(void); #else @@ -37,6 +40,7 @@ static inline void cxl_region_exit(void) #define CXL_REGION_TYPE(x) NULL #define SET_CXL_REGION_ATTR(x) #define CXL_PMEM_REGION_TYPE(x) NULL +#define CXL_DAX_REGION_TYPE(x) NULL #endif struct cxl_send_command; @@ -56,9 +60,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); extern struct rw_semaphore cxl_dpa_rwsem; -bool is_switch_decoder(struct device *dev); -struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); - int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index dcc16d7cb8f3..80eccae6ba9e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -279,7 +279,7 @@ success: return 0; } -static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) { @@ -295,6 +295,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { @@ -676,6 +677,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) port->commit_end--; cxld->flags &= ~CXL_DECODER_F_ENABLE; + /* Userspace is now responsible for reconfiguring this decoder */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled; + + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxled->state = CXL_DECODER_STATE_MANUAL; + } + return 0; } @@ -783,6 +792,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return rc; } *dpa_base += dpa_size + skip; + + cxled->state = CXL_DECODER_STATE_AUTO; + return 0; } @@ -826,7 +838,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxled = cxl_endpoint_decoder_alloc(port); if (IS_ERR(cxled)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxled); } cxld = &cxled->cxld; @@ -836,7 +849,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxlsd = cxl_switch_decoder_alloc(port, target_count); if (IS_ERR(cxlsd)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxlsd); } cxld = &cxlsd->cxld; @@ -844,13 +858,16 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base); if (rc) { + dev_warn(&port->dev, + "Failed to initialize decoder%d.%d\n", + port->id, i); put_device(&cxld->dev); return rc; } rc = add_hdm_decoder(port, cxld, target_map); if (rc) { dev_warn(&port->dev, - "Failed to add decoder to port\n"); + "Failed to add decoder%d.%d\n", port->id, i); return rc; } } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fd3b13b0fb7f..fc7631bb1c24 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -3,11 +3,13 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/security.h> #include <linux/debugfs.h> +#include <linux/ktime.h> #include <linux/mutex.h> #include <cxlmem.h> #include <cxl.h> #include "core.h" +#include "trace.h" static bool cxl_raw_allow_all; @@ -742,6 +744,203 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +static const uuid_t gen_media_event_uuid = + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6); + +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +static const uuid_t dram_event_uuid = + UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24); + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +static const uuid_t mem_mod_event_uuid = + UUID_INIT(0xfe927475, 0xdd59, 0x4339, + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74); + +static void cxl_event_trace_record(const struct device *dev, + enum cxl_event_log_type type, + struct cxl_event_record_raw *record) +{ + uuid_t *id = &record->hdr.id; + + if (uuid_equal(id, &gen_media_event_uuid)) { + struct cxl_event_gen_media *rec = + (struct cxl_event_gen_media *)record; + + trace_cxl_general_media(dev, type, rec); + } else if (uuid_equal(id, &dram_event_uuid)) { + struct cxl_event_dram *rec = (struct cxl_event_dram *)record; + + trace_cxl_dram(dev, type, rec); + } else if (uuid_equal(id, &mem_mod_event_uuid)) { + struct cxl_event_mem_module *rec = + (struct cxl_event_mem_module *)record; + + trace_cxl_memory_module(dev, type, rec); + } else { + /* For unknown record types print just the header */ + trace_cxl_generic_event(dev, type, record); + } +} + +static int cxl_clear_event_record(struct cxl_dev_state *cxlds, + enum cxl_event_log_type log, + struct cxl_get_event_payload *get_pl) +{ + struct cxl_mbox_clear_event_payload *payload; + u16 total = le16_to_cpu(get_pl->record_count); + u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES; + size_t pl_size = struct_size(payload, handles, max_handles); + struct cxl_mbox_cmd mbox_cmd; + u16 cnt; + int rc = 0; + int i; + + /* Payload size may limit the max handles */ + if (pl_size > cxlds->payload_size) { + max_handles = (cxlds->payload_size - sizeof(*payload)) / + sizeof(__le16); + pl_size = struct_size(payload, handles, max_handles); + } + + payload = kvzalloc(pl_size, GFP_KERNEL); + if (!payload) + return -ENOMEM; + + *payload = (struct cxl_mbox_clear_event_payload) { + .event_log = log, + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD, + .payload_in = payload, + .size_in = pl_size, + }; + + /* + * Clear Event Records uses u8 for the handle cnt while Get Event + * Record can return up to 0xffff records. + */ + i = 0; + for (cnt = 0; cnt < total; cnt++) { + payload->handles[i++] = get_pl->records[cnt].hdr.handle; + dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n", + log, le16_to_cpu(payload->handles[i])); + + if (i == max_handles) { + payload->nr_recs = i; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto free_pl; + i = 0; + } + } + + /* Clear what is left if any */ + if (i) { + payload->nr_recs = i; + mbox_cmd.size_in = struct_size(payload, handles, i); + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto free_pl; + } + +free_pl: + kvfree(payload); + return rc; +} + +static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, + enum cxl_event_log_type type) +{ + struct cxl_get_event_payload *payload; + struct cxl_mbox_cmd mbox_cmd; + u8 log_type = type; + u16 nr_rec; + + mutex_lock(&cxlds->event.log_lock); + payload = cxlds->event.buf; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, + .payload_in = &log_type, + .size_in = sizeof(log_type), + .payload_out = payload, + .size_out = cxlds->payload_size, + .min_out = struct_size(payload, records, 0), + }; + + do { + int rc, i; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) { + dev_err_ratelimited(cxlds->dev, + "Event log '%d': Failed to query event records : %d", + type, rc); + break; + } + + nr_rec = le16_to_cpu(payload->record_count); + if (!nr_rec) + break; + + for (i = 0; i < nr_rec; i++) + cxl_event_trace_record(cxlds->dev, type, + &payload->records[i]); + + if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) + trace_cxl_overflow(cxlds->dev, type, payload); + + rc = cxl_clear_event_record(cxlds, type, payload); + if (rc) { + dev_err_ratelimited(cxlds->dev, + "Event log '%d': Failed to clear events : %d", + type, rc); + break; + } + } while (nr_rec); + + mutex_unlock(&cxlds->event.log_lock); +} + +/** + * cxl_mem_get_event_records - Get Event Records from the device + * @cxlds: The device data for the operation + * @status: Event Status register value identifying which events are available. + * + * Retrieve all event records available on the device, report them as trace + * events, and clear them. + * + * See CXL rev 3.0 @8.2.9.2.2 Get Event Records + * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records + */ +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status) +{ + dev_dbg(cxlds->dev, "Reading event logs: %x\n", status); + + if (status & CXLDEV_EVENT_STATUS_FATAL) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL); + if (status & CXLDEV_EVENT_STATUS_FAIL) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); + if (status & CXLDEV_EVENT_STATUS_WARN) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); + if (status & CXLDEV_EVENT_STATUS_INFO) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); + /** * cxl_mem_get_partition_info - Get partition info * @cxlds: The device data for the operation @@ -882,6 +1081,32 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); +int cxl_set_timestamp(struct cxl_dev_state *cxlds) +{ + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_set_timestamp_in pi; + int rc; + + pi.timestamp = cpu_to_le64(ktime_get_real_ns()); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_TIMESTAMP, + .size_in = sizeof(pi), + .payload_in = &pi, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + /* + * Command is optional. Devices may have another way of providing + * a timestamp, or may return all 0s in timestamp fields. + * Don't report an error if this command isn't supported + */ + if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED)) + return rc; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL); + struct cxl_dev_state *cxl_dev_state_create(struct device *dev) { struct cxl_dev_state *cxlds; @@ -893,6 +1118,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) } mutex_init(&cxlds->mbox_mutex); + mutex_init(&cxlds->event.log_lock); cxlds->dev = dev; return cxlds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 12bd9ddaba22..0af8856936dc 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -246,6 +246,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, if (rc < 0) goto err; cxlmd->id = rc; + cxlmd->depth = -1; dev = &cxlmd->dev; device_initialize(dev); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 1d1492440287..2b463f107cb5 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) return devm_add_action_or_reset(host, clear_mem_enable, cxlds); } -static bool range_contains(struct range *r1, struct range *r2) -{ - return r1->start <= r2->start && r1->end >= r2->end; -} - /* require dvsec ranges to be covered by a locked platform window */ static int dvsec_range_allowed(struct device *dev, void *arg) { @@ -684,8 +679,11 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { - addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); + void __iomem *rcc_addr = + cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); } else { fe = status; } diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index f3d2169b6731..c2e4b1093788 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -227,34 +227,16 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb, return cxl_nvd; } -static void cxl_nvd_unregister(void *_cxl_nvd) +static void cxlmd_release_nvdimm(void *_cxlmd) { - struct cxl_nvdimm *cxl_nvd = _cxl_nvd; - struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_memdev *cxlmd = _cxlmd; + struct cxl_nvdimm *cxl_nvd = cxlmd->cxl_nvd; struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; - /* - * Either the bridge is in ->remove() context under the device_lock(), - * or cxlmd_release_nvdimm() is cancelling the bridge's release action - * for @cxl_nvd and doing it itself (while manually holding the bridge - * lock). - */ - device_lock_assert(&cxl_nvb->dev); cxl_nvd->cxlmd = NULL; cxlmd->cxl_nvd = NULL; + cxlmd->cxl_nvb = NULL; device_unregister(&cxl_nvd->dev); -} - -static void cxlmd_release_nvdimm(void *_cxlmd) -{ - struct cxl_memdev *cxlmd = _cxlmd; - struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; - - device_lock(&cxl_nvb->dev); - if (cxlmd->cxl_nvd) - devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister, - cxlmd->cxl_nvd); - device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); } @@ -293,22 +275,6 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev)); - /* - * The two actions below arrange for @cxl_nvd to be deleted when either - * the top-level PMEM bridge goes down, or the endpoint device goes - * through ->remove(). - */ - device_lock(&cxl_nvb->dev); - if (cxl_nvb->dev.driver) - rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister, - cxl_nvd); - else - rc = -ENXIO; - device_unlock(&cxl_nvb->dev); - - if (rc) - goto err_alloc; - /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */ return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 609aa6801b14..724190418698 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_NVDIMM; if (dev->type == CXL_PMEM_REGION_TYPE()) return CXL_DEVICE_PMEM_REGION; + if (dev->type == CXL_DAX_REGION_TYPE()) + return CXL_DEVICE_DAX_REGION; if (is_cxl_port(dev)) { if (is_cxl_root(to_cxl_port(dev))) return CXL_DEVICE_ROOT; @@ -180,17 +182,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - switch (cxled->mode) { - case CXL_DECODER_RAM: - return sysfs_emit(buf, "ram\n"); - case CXL_DECODER_PMEM: - return sysfs_emit(buf, "pmem\n"); - case CXL_DECODER_NONE: - return sysfs_emit(buf, "none\n"); - case CXL_DECODER_MIXED: - default: - return sysfs_emit(buf, "mixed\n"); - } + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, @@ -304,6 +296,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, SET_CXL_REGION_ATTR(create_pmem_region) + SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) NULL, }; @@ -315,6 +308,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd) return (cxlrd->cxlsd.cxld.flags & flags) == flags; } +static bool can_create_ram(struct cxl_root_decoder *cxlrd) +{ + unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM; + + return (cxlrd->cxlsd.cxld.flags & flags) == flags; +} + static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); @@ -323,7 +323,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute * if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd)) return 0; - if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd)) + if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd)) + return 0; + + if (a == CXL_REGION_ATTR(delete_region) && + !(can_create_pmem(cxlrd) || can_create_ram(cxlrd))) return 0; return a->mode; @@ -444,6 +448,7 @@ bool is_endpoint_decoder(struct device *dev) { return dev->type == &cxl_decoder_endpoint_type; } +EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL); bool is_root_decoder(struct device *dev) { @@ -455,6 +460,7 @@ bool is_switch_decoder(struct device *dev) { return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type; } +EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL); struct cxl_decoder *to_cxl_decoder(struct device *dev) { @@ -482,6 +488,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev) return NULL; return container_of(dev, struct cxl_switch_decoder, cxld.dev); } +EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL); static void cxl_ep_release(struct cxl_ep *ep) { @@ -1207,6 +1214,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(&endpoint->dev); dev_set_drvdata(dev, endpoint); + cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); @@ -1241,50 +1249,55 @@ static void reap_dports(struct cxl_port *port) } } +struct detach_ctx { + struct cxl_memdev *cxlmd; + int depth; +}; + +static int port_has_memdev(struct device *dev, const void *data) +{ + const struct detach_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + if (port->depth != ctx->depth) + return 0; + + return !!cxl_ep_load(port, ctx->cxlmd); +} + static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; - struct device *iter; - for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { - struct device *dport_dev = grandparent(iter); + for (int i = cxlmd->depth - 1; i >= 1; i--) { struct cxl_port *port, *parent_port; + struct detach_ctx ctx = { + .cxlmd = cxlmd, + .depth = i, + }; + struct device *dev; struct cxl_ep *ep; bool died = false; - if (!dport_dev) - break; - - port = find_cxl_port(dport_dev, NULL); - if (!port) - continue; - - if (is_cxl_root(port)) { - put_device(&port->dev); + dev = bus_find_device(&cxl_bus_type, NULL, &ctx, + port_has_memdev); + if (!dev) continue; - } + port = to_cxl_port(dev); parent_port = to_cxl_port(port->dev.parent); device_lock(&parent_port->dev); - if (!parent_port->dev.driver) { - /* - * The bottom-up race to delete the port lost to a - * top-down port disable, give up here, because the - * parent_port ->remove() will have cleaned up all - * descendants. - */ - device_unlock(&parent_port->dev); - put_device(&port->dev); - continue; - } - device_lock(&port->dev); ep = cxl_ep_load(port, cxlmd); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); cxl_ep_remove(port, ep); if (ep && !port->dead && xa_empty(&port->endpoints) && - !is_cxl_root(parent_port)) { + !is_cxl_root(parent_port) && parent_port->dev.driver) { /* * This was the last ep attached to a dynamically * enumerated port. Block new cxl_add_ep() and garbage @@ -1620,6 +1633,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } cxlrd->calc_hb = calc_hb; + mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; cxld->dev.type = &cxl_decoder_root_type; @@ -2003,6 +2017,6 @@ static void cxl_core_exit(void) debugfs_remove_recursive(cxl_debugfs); } -module_init(cxl_core_init); +subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3482a9e6d2f2..f29028148806 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/uuid.h> +#include <linux/sort.h> #include <linux/idr.h> #include <cxlmem.h> #include <cxl.h> @@ -45,7 +46,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - rc = sysfs_emit(buf, "%pUb\n", &p->uuid); + if (cxlr->mode != CXL_DECODER_PMEM) + rc = sysfs_emit(buf, "\n"); + else + rc = sysfs_emit(buf, "%pUb\n", &p->uuid); up_read(&cxl_region_rwsem); return rc; @@ -131,7 +135,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); struct cxl_ep *ep; - int rc; + int rc = 0; while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -143,7 +147,8 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->reset(cxld); + if (cxld->reset) + rc = cxld->reset(cxld); if (rc) return rc; } @@ -201,7 +206,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - cxld->reset(cxld); + if (cxld->reset) + cxld->reset(cxld); } cxled->cxld.reset(&cxled->cxld); @@ -300,8 +306,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, struct device *dev = kobj_to_dev(kobj); struct cxl_region *cxlr = to_cxl_region(dev); + /* + * Support tooling that expects to find a 'uuid' attribute for all + * regions regardless of mode. + */ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) - return 0; + return 0444; return a->mode; } @@ -458,6 +468,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(resource); +static ssize_t mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); +} +static DEVICE_ATTR_RO(mode); + static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); @@ -508,7 +527,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr) if (device_is_registered(&cxlr->dev)) lockdep_assert_held_write(&cxl_region_rwsem); if (p->res) { - remove_resource(p->res); + /* + * Autodiscovered regions may not have been able to insert their + * resource. + */ + if (p->res->parent) + remove_resource(p->res); kfree(p->res); p->res = NULL; } @@ -585,6 +609,7 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_interleave_granularity.attr, &dev_attr_resource.attr, &dev_attr_size.attr, + &dev_attr_mode.attr, NULL, }; @@ -1006,10 +1031,10 @@ static int cxl_port_setup_targets(struct cxl_port *port, int i, distance; /* - * Passthrough ports impose no distance requirements between + * Passthrough decoders impose no distance requirements between * peers */ - if (port->nr_dports == 1) + if (cxl_rr->nr_targets == 1) distance = 0; else distance = p->nr_targets / cxl_rr->nr_targets; @@ -1088,12 +1113,35 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } - cxld->interleave_ways = iw; - cxld->interleave_granularity = ig; - cxld->hpa_range = (struct range) { - .start = p->res->start, - .end = p->res->end, - }; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxld->interleave_ways != iw || + cxld->interleave_granularity != ig || + cxld->hpa_range.start != p->res->start || + cxld->hpa_range.end != p->res->end || + ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { + dev_err(&cxlr->dev, + "%s:%s %s expected iw: %d ig: %d %pr\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, iw, ig, p->res); + dev_err(&cxlr->dev, + "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, cxld->interleave_ways, + cxld->interleave_granularity, + (cxld->flags & CXL_DECODER_F_ENABLE) ? + "enabled" : + "disabled", + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + } else { + cxld->interleave_ways = iw; + cxld->interleave_granularity = ig; + cxld->hpa_range = (struct range) { + .start = p->res->start, + .end = p->res->end, + }; + } dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport), dev_name(&port->dev), iw, ig); add_target: @@ -1104,7 +1152,17 @@ add_target: dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); return -ENXIO; } - cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) { + dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlsd->cxld.dev), + dev_name(ep->dport->dport), + cxl_rr->nr_targets_set); + return -ENXIO; + } + } else + cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; @@ -1146,6 +1204,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) struct cxl_ep *ep; int i; + /* + * In the auto-discovery case skip automatic teardown since the + * address space is already active + */ + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + return; + for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); @@ -1178,8 +1243,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent); /* - * Descend the topology tree programming targets while - * looking for conflicts. + * Descend the topology tree programming / validating + * targets while looking for conflicts. */ for (ep = cxl_ep_load(iter, cxlmd); iter; iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { @@ -1194,29 +1259,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) return 0; } -static int cxl_region_attach(struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled, int pos) +static int cxl_region_validate_position(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + int pos) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *ep_port, *root_port, *iter; struct cxl_region_params *p = &cxlr->params; - struct cxl_dport *dport; - int i, rc = -ENXIO; - - if (cxled->mode == CXL_DECODER_DEAD) { - dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); - return -ENODEV; - } - - /* all full of members, or interleave config not established? */ - if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "region already active\n"); - return -EBUSY; - } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "interleave config missing\n"); - return -ENXIO; - } + int i; if (pos < 0 || pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, @@ -1255,6 +1304,256 @@ static int cxl_region_attach(struct cxl_region *cxlr, } } + return 0; +} + +static int cxl_region_attach_position(struct cxl_region *cxlr, + struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled, + const struct cxl_dport *dport, int pos) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *iter; + int rc; + + if (cxlrd->calc_hb(cxlrd, pos) != dport) { + dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + dev_name(&cxlrd->cxlsd.cxld.dev)); + return -ENXIO; + } + + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + rc = cxl_port_attach_region(iter, cxlr, cxled, pos); + if (rc) + goto err; + } + + return 0; + +err: + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) + cxl_port_detach_region(iter, cxlr, cxled); + return rc; +} + +static int cxl_region_attach_auto(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + + if (cxled->state != CXL_DECODER_STATE_AUTO) { + dev_err(&cxlr->dev, + "%s: unable to add decoder to autodetected region\n", + dev_name(&cxled->cxld.dev)); + return -EINVAL; + } + + if (pos >= 0) { + dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n", + dev_name(&cxled->cxld.dev), pos); + return -EINVAL; + } + + if (p->nr_targets >= p->interleave_ways) { + dev_err(&cxlr->dev, "%s: no more target slots available\n", + dev_name(&cxled->cxld.dev)); + return -ENXIO; + } + + /* + * Temporarily record the endpoint decoder into the target array. Yes, + * this means that userspace can view devices in the wrong position + * before the region activates, and must be careful to understand when + * it might be racing region autodiscovery. + */ + pos = p->nr_targets; + p->targets[pos] = cxled; + cxled->pos = pos; + p->nr_targets++; + + return 0; +} + +static struct cxl_port *next_port(struct cxl_port *port) +{ + if (!port->parent_dport) + return NULL; + return port->parent_dport->port; +} + +static int decoder_match_range(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled = data; + struct cxl_switch_decoder *cxlsd; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); +} + +static void find_positions(const struct cxl_switch_decoder *cxlsd, + const struct cxl_port *iter_a, + const struct cxl_port *iter_b, int *a_pos, + int *b_pos) +{ + int i; + + for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { + if (cxlsd->target[i] == iter_a->parent_dport) + *a_pos = i; + else if (cxlsd->target[i] == iter_b->parent_dport) + *b_pos = i; + if (*a_pos >= 0 && *b_pos >= 0) + break; + } +} + +static int cmp_decode_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); + struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); + struct cxl_port *port_a = cxled_to_port(cxled_a); + struct cxl_port *port_b = cxled_to_port(cxled_b); + struct cxl_port *iter_a, *iter_b, *port = NULL; + struct cxl_switch_decoder *cxlsd; + struct device *dev; + int a_pos, b_pos; + unsigned int seq; + + /* Exit early if any prior sorting failed */ + if (cxled_a->pos < 0 || cxled_b->pos < 0) + return 0; + + /* + * Walk up the hierarchy to find a shared port, find the decoder that + * maps the range, compare the relative position of those dport + * mappings. + */ + for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { + struct cxl_port *next_a, *next_b; + + next_a = next_port(iter_a); + if (!next_a) + break; + + for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { + next_b = next_port(iter_b); + if (next_a != next_b) + continue; + port = next_a; + break; + } + + if (port) + break; + } + + if (!port) { + dev_err(cxlmd_a->dev.parent, + "failed to find shared port with %s\n", + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev = device_find_child(&port->dev, cxled_a, decoder_match_range); + if (!dev) { + struct range *range = &cxled_a->cxld.hpa_range; + + dev_err(port->uport, + "failed to find decoder that maps %#llx-%#llx\n", + range->start, range->end); + goto err; + } + + cxlsd = to_cxl_switch_decoder(dev); + do { + seq = read_seqbegin(&cxlsd->target_lock); + find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); + } while (read_seqretry(&cxlsd->target_lock, seq)); + + put_device(dev); + + if (a_pos < 0 || b_pos < 0) { + dev_err(port->uport, + "failed to find shared decoder for %s and %s\n", + dev_name(cxlmd_a->dev.parent), + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent), + a_pos - b_pos < 0 ? "before" : "after", + dev_name(cxlmd_b->dev.parent)); + + return a_pos - b_pos; +err: + cxled_a->pos = -1; + return 0; +} + +static int cxl_region_sort_targets(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int i, rc = 0; + + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, + NULL); + + for (i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + + /* + * Record that sorting failed, but still continue to restore + * cxled->pos with its ->targets[] position so that follow-on + * code paths can reliably do p->targets[cxled->pos] to + * self-reference their entry. + */ + if (cxled->pos < 0) + rc = -ENXIO; + cxled->pos = i; + } + + dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); + return rc; +} + +static int cxl_region_attach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_region_params *p = &cxlr->params; + struct cxl_port *ep_port, *root_port; + struct cxl_dport *dport; + int rc = -ENXIO; + + if (cxled->mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", + dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); + return -EINVAL; + } + + if (cxled->mode == CXL_DECODER_DEAD) { + dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); + return -ENODEV; + } + + /* all full of members, or interleave config not established? */ + if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "region already active\n"); + return -EBUSY; + } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "interleave config missing\n"); + return -ENXIO; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1265,13 +1564,6 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } - if (cxlrd->calc_hb(cxlrd, pos) != dport) { - dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), - dev_name(&cxlrd->cxlsd.cxld.dev)); - return -ENXIO; - } - if (cxled->cxld.target_type != cxlr->type) { dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), @@ -1295,13 +1587,58 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) { - rc = cxl_port_attach_region(iter, cxlr, cxled, pos); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + int i; + + rc = cxl_region_attach_auto(cxlr, cxled, pos); if (rc) - goto err; + return rc; + + /* await more targets to arrive... */ + if (p->nr_targets < p->interleave_ways) + return 0; + + /* + * All targets are here, which implies all PCI enumeration that + * affects this region has been completed. Walk the topology to + * sort the devices into their relative region decode position. + */ + rc = cxl_region_sort_targets(cxlr); + if (rc) + return rc; + + for (i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + ep_port = cxled_to_port(cxled); + dport = cxl_find_dport_by_dev(root_port, + ep_port->host_bridge); + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, + dport, i); + if (rc) + return rc; + } + + rc = cxl_region_setup_targets(cxlr); + if (rc) + return rc; + + /* + * If target setup succeeds in the autodiscovery case + * then the region is already committed. + */ + p->state = CXL_CONFIG_COMMIT; + + return 0; } + rc = cxl_region_validate_position(cxlr, cxled, pos); + if (rc) + return rc; + + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos); + if (rc) + return rc; + p->targets[pos] = cxled; cxled->pos = pos; p->nr_targets++; @@ -1324,10 +1661,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, err_decrement: p->nr_targets--; -err: - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) - cxl_port_detach_region(iter, cxlr, cxled); + cxled->pos = -1; + p->targets[pos] = NULL; return rc; } @@ -1399,31 +1734,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) up_write(&cxl_region_rwsem); } -static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos) +static int attach_target(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + unsigned int state) { - struct device *dev; - int rc; - - dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder); - if (!dev) - return -ENODEV; - - if (!is_endpoint_decoder(dev)) { - put_device(dev); - return -EINVAL; - } + int rc = 0; - rc = down_write_killable(&cxl_region_rwsem); + if (state == TASK_INTERRUPTIBLE) + rc = down_write_killable(&cxl_region_rwsem); + else + down_write(&cxl_region_rwsem); if (rc) - goto out; + return rc; + down_read(&cxl_dpa_rwsem); - rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos); + rc = cxl_region_attach(cxlr, cxled, pos); if (rc == 0) set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); -out: - put_device(dev); return rc; } @@ -1461,8 +1790,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, if (sysfs_streq(buf, "\n")) rc = detach_target(cxlr, pos); - else - rc = attach_target(cxlr, buf, pos); + else { + struct device *dev; + + dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf); + if (!dev) + return -ENODEV; + + if (!is_endpoint_decoder(dev)) { + rc = -EINVAL; + goto out; + } + + rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos, + TASK_INTERRUPTIBLE); +out: + put_device(dev); + } if (rc < 0) return rc; @@ -1666,6 +2010,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, struct device *dev; int rc; + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; + default: + dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); + return ERR_PTR(-EINVAL); + } + cxlr = cxl_region_alloc(cxlrd, id); if (IS_ERR(cxlr)) return cxlr; @@ -1694,12 +2047,38 @@ err: return ERR_PTR(rc); } +static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) +{ + return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +} + static ssize_t create_pmem_region_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + return __create_region_show(to_cxl_root_decoder(dev), buf); +} - return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +static ssize_t create_ram_region_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __create_region_show(to_cxl_root_decoder(dev), buf); +} + +static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, + enum cxl_decoder_mode mode, int id) +{ + int rc; + + rc = memregion_alloc(GFP_KERNEL); + if (rc < 0) + return ERR_PTR(rc); + + if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { + memregion_free(rc); + return ERR_PTR(-EBUSY); + } + + return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER); } static ssize_t create_pmem_region_store(struct device *dev, @@ -1708,29 +2087,39 @@ static ssize_t create_pmem_region_store(struct device *dev, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; - int id, rc; + int rc, id; rc = sscanf(buf, "region%d\n", &id); if (rc != 1) return -EINVAL; - rc = memregion_alloc(GFP_KERNEL); - if (rc < 0) - return rc; + cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id); + if (IS_ERR(cxlr)) + return PTR_ERR(cxlr); - if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { - memregion_free(rc); - return -EBUSY; - } + return len; +} +DEVICE_ATTR_RW(create_pmem_region); - cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM, - CXL_DECODER_EXPANDER); +static ssize_t create_ram_region_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + struct cxl_region *cxlr; + int rc, id; + + rc = sscanf(buf, "region%d\n", &id); + if (rc != 1) + return -EINVAL; + + cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); return len; } -DEVICE_ATTR_RW(create_pmem_region); +DEVICE_ATTR_RW(create_ram_region); static ssize_t region_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1891,6 +2280,75 @@ out: return cxlr_pmem; } +static void cxl_dax_region_release(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + + kfree(cxlr_dax); +} + +static const struct attribute_group *cxl_dax_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +const struct device_type cxl_dax_region_type = { + .name = "cxl_dax_region", + .release = cxl_dax_region_release, + .groups = cxl_dax_region_attribute_groups, +}; + +static bool is_cxl_dax_region(struct device *dev) +{ + return dev->type == &cxl_dax_region_type; +} + +struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), + "not a cxl_dax_region device\n")) + return NULL; + return container_of(dev, struct cxl_dax_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); + +static struct lock_class_key cxl_dax_region_key; + +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_dax_region *cxlr_dax; + struct device *dev; + + down_read(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) { + cxlr_dax = ERR_PTR(-ENXIO); + goto out; + } + + cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); + if (!cxlr_dax) { + cxlr_dax = ERR_PTR(-ENOMEM); + goto out; + } + + cxlr_dax->hpa_range.start = p->res->start; + cxlr_dax->hpa_range.end = p->res->end; + + dev = &cxlr_dax->dev; + cxlr_dax->cxlr = cxlr; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_dax_region_type; +out: + up_read(&cxl_region_rwsem); + + return cxlr_dax; +} + static void cxlr_pmem_unregister(void *_cxlr_pmem) { struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; @@ -1975,6 +2433,227 @@ err_bridge: return rc; } +static void cxlr_dax_unregister(void *_cxlr_dax) +{ + struct cxl_dax_region *cxlr_dax = _cxlr_dax; + + device_unregister(&cxlr_dax->dev); +} + +static int devm_cxl_add_dax_region(struct cxl_region *cxlr) +{ + struct cxl_dax_region *cxlr_dax; + struct device *dev; + int rc; + + cxlr_dax = cxl_dax_region_alloc(cxlr); + if (IS_ERR(cxlr_dax)) + return PTR_ERR(cxlr_dax); + + dev = &cxlr_dax->dev; + rc = dev_set_name(dev, "dax_region%d", cxlr->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, + cxlr_dax); +err: + put_device(dev); + return rc; +} + +static int match_decoder_by_range(struct device *dev, void *data) +{ + struct range *r1, *r2 = data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + r1 = &cxlrd->cxlsd.cxld.hpa_range; + return range_contains(r1, r2); +} + +static int match_region_by_range(struct device *dev, void *data) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct range *r = data; + int rc = 0; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + down_read(&cxl_region_rwsem); + if (p->res && p->res->start == r->start && p->res->end == r->end) + rc = 1; + up_read(&cxl_region_rwsem); + + return rc; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct resource *res; + int rc; + + do { + cxlr = __create_region(cxlrd, cxled->mode, + atomic_read(&cxlrd->region_id)); + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); + + if (IS_ERR(cxlr)) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s failed assign region: %ld\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, PTR_ERR(cxlr)); + return cxlr; + } + + down_write(&cxl_region_rwsem); + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s autodiscovery interrupted\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__); + rc = -EBUSY; + goto err; + } + + set_bit(CXL_REGION_F_AUTO, &cxlr->flags); + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + rc = -ENOMEM; + goto err; + } + + *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), + dev_name(&cxlr->dev)); + rc = insert_resource(cxlrd->res, res); + if (rc) { + /* + * Platform-firmware may not have split resources like "System + * RAM" on CXL window boundaries see cxl_region_iomem_release() + */ + dev_warn(cxlmd->dev.parent, + "%s:%s: %s %s cannot insert resource\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, dev_name(&cxlr->dev)); + } + + p->res = res; + p->interleave_ways = cxled->cxld.interleave_ways; + p->interleave_granularity = cxled->cxld.interleave_granularity; + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); + if (rc) + goto err; + + dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, + dev_name(&cxlr->dev), p->res, p->interleave_ways, + p->interleave_granularity); + + /* ...to match put_device() in cxl_add_to_region() */ + get_device(&cxlr->dev); + up_write(&cxl_region_rwsem); + + return cxlr; + +err: + up_write(&cxl_region_rwsem); + devm_release_action(port->uport, unregister_region, cxlr); + return ERR_PTR(rc); +} + +int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_decoder *cxld = &cxled->cxld; + struct device *cxlrd_dev, *region_dev; + struct cxl_root_decoder *cxlrd; + struct cxl_region_params *p; + struct cxl_region *cxlr; + bool attach = false; + int rc; + + cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, + match_decoder_by_range); + if (!cxlrd_dev) { + dev_err(cxlmd->dev.parent, + "%s:%s no CXL window for range %#llx:%#llx\n", + dev_name(&cxlmd->dev), dev_name(&cxld->dev), + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + + cxlrd = to_cxl_root_decoder(cxlrd_dev); + + /* + * Ensure that if multiple threads race to construct_region() for @hpa + * one does the construction and the others add to that. + */ + mutex_lock(&cxlrd->range_lock); + region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa, + match_region_by_range); + if (!region_dev) { + cxlr = construct_region(cxlrd, cxled); + region_dev = &cxlr->dev; + } else + cxlr = to_cxl_region(region_dev); + mutex_unlock(&cxlrd->range_lock); + + rc = PTR_ERR_OR_ZERO(cxlr); + if (rc) + goto out; + + attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); + + down_read(&cxl_region_rwsem); + p = &cxlr->params; + attach = p->state == CXL_CONFIG_COMMIT; + up_read(&cxl_region_rwsem); + + if (attach) { + /* + * If device_attach() fails the range may still be active via + * the platform-firmware memory map, otherwise the driver for + * regions is local to this file, so driver matching can't fail. + */ + if (device_attach(&cxlr->dev) < 0) + dev_err(&cxlr->dev, "failed to enable, range: %pr\n", + p->res); + } + + put_device(region_dev); +out: + put_device(cxlrd_dev); + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); + static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) @@ -1999,6 +2678,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) return 0; } +static int is_system_ram(struct resource *res, void *arg) +{ + struct cxl_region *cxlr = arg; + struct cxl_region_params *p = &cxlr->params; + + dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res); + return 1; +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -2032,6 +2720,17 @@ out: switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); + case CXL_DECODER_RAM: + /* + * The region can not be manged by CXL if any portion of + * it is already online as 'System RAM' + */ + if (walk_iomem_res_desc(IORES_DESC_NONE, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + p->res->start, p->res->end, cxlr, + is_system_ram) > 0) + return 0; + return devm_cxl_add_dax_region(cxlr); default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 20ca2fe2ca8e..c72ef9321cfe 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -6,8 +6,11 @@ #if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) #define _CXL_EVENTS_H -#include <cxl.h> #include <linux/tracepoint.h> +#include <asm-generic/unaligned.h> + +#include <cxl.h> +#include <cxlmem.h> #define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) #define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) @@ -103,6 +106,481 @@ TRACE_EVENT(cxl_aer_correctable_error, ) ); +#define cxl_event_log_type_str(type) \ + __print_symbolic(type, \ + { CXL_EVENT_TYPE_INFO, "Informational" }, \ + { CXL_EVENT_TYPE_WARN, "Warning" }, \ + { CXL_EVENT_TYPE_FAIL, "Failure" }, \ + { CXL_EVENT_TYPE_FATAL, "Fatal" }) + +TRACE_EVENT(cxl_overflow, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_get_event_payload *payload), + + TP_ARGS(dev, log, payload), + + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(int, log) + __field(u64, first_ts) + __field(u64, last_ts) + __field(u16, count) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->log = log; + __entry->count = le16_to_cpu(payload->overflow_err_count); + __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp); + __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp); + ), + + TP_printk("%s: log=%s : %u records from %llu to %llu", + __get_str(dev_name), cxl_event_log_type_str(__entry->log), + __entry->count, __entry->first_ts, __entry->last_ts) + +); + +/* + * Common Event Record Format + * CXL 3.0 section 8.2.9.2.1; Table 8-42 + */ +#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2) +#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3) +#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) +#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) +#define show_hdr_flags(flags) __print_flags(flags, " | ", \ + { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ + { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ + { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ + { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \ +) + +/* + * Define macros for the common header of each CXL event. + * + * Tracepoints using these macros must do 3 things: + * + * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry + * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign; + * pass the dev, log, and CXL event header + * 3) Use CXL_EVT_TP_printk() instead of TP_printk() + * + * See the generic_event tracepoint as an example. + */ +#define CXL_EVT_TP_entry \ + __string(dev_name, dev_name(dev)) \ + __field(int, log) \ + __field_struct(uuid_t, hdr_uuid) \ + __field(u32, hdr_flags) \ + __field(u16, hdr_handle) \ + __field(u16, hdr_related_handle) \ + __field(u64, hdr_timestamp) \ + __field(u8, hdr_length) \ + __field(u8, hdr_maint_op_class) + +#define CXL_EVT_TP_fast_assign(dev, l, hdr) \ + __assign_str(dev_name, dev_name(dev)); \ + __entry->log = (l); \ + memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ + __entry->hdr_length = (hdr).length; \ + __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ + __entry->hdr_handle = le16_to_cpu((hdr).handle); \ + __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ + __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ + __entry->hdr_maint_op_class = (hdr).maint_op_class + +#define CXL_EVT_TP_printk(fmt, ...) \ + TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \ + "handle=%x related_handle=%x maint_op_class=%u" \ + " : " fmt, \ + __get_str(dev_name), cxl_event_log_type_str(__entry->log), \ + __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ + show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ + __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ + ##__VA_ARGS__) + +TRACE_EVENT(cxl_generic_event, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_record_raw *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + __array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); + ), + + CXL_EVT_TP_printk("%s", + __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH)) +); + +/* + * Physical Address field masks + * + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CXL_DPA_FLAGS_MASK 0x3F +#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK) + +#define CXL_DPA_VOLATILE BIT(0) +#define CXL_DPA_NOT_REPAIRABLE BIT(1) +#define show_dpa_flags(flags) __print_flags(flags, "|", \ + { CXL_DPA_VOLATILE, "VOLATILE" }, \ + { CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \ +) + +/* + * General Media Event Record - GMER + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0) +#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1) +#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2) +#define show_event_desc_flags(flags) __print_flags(flags, "|", \ + { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \ + { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \ + { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \ +) + +#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00 +#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01 +#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02 +#define show_mem_event_type(type) __print_symbolic(type, \ + { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +) + +#define CXL_GMER_TRANS_UNKNOWN 0x00 +#define CXL_GMER_TRANS_HOST_READ 0x01 +#define CXL_GMER_TRANS_HOST_WRITE 0x02 +#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03 +#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06 +#define show_trans_type(type) __print_symbolic(type, \ + { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \ + { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \ + { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \ + { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \ + { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \ +) + +#define CXL_GMER_VALID_CHANNEL BIT(0) +#define CXL_GMER_VALID_RANK BIT(1) +#define CXL_GMER_VALID_DEVICE BIT(2) +#define CXL_GMER_VALID_COMPONENT BIT(3) +#define show_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_GMER_VALID_RANK, "RANK" }, \ + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \ + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \ +) + +TRACE_EVENT(cxl_general_media, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_gen_media *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + /* General Media */ + __field(u64, dpa) + __field(u8, descriptor) + __field(u8, type) + __field(u8, transaction_type) + __field(u8, channel) + __field(u32, device) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u16, validity_flags) + /* Following are out of order to pack trace record */ + __field(u8, rank) + __field(u8, dpa_flags) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* General Media */ + __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; + /* Mask after flags have been parsed */ + __entry->dpa &= CXL_DPA_MASK; + __entry->descriptor = rec->descriptor; + __entry->type = rec->type; + __entry->transaction_type = rec->transaction_type; + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->device = get_unaligned_le24(rec->device); + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + __entry->validity_flags = get_unaligned_le16(&rec->validity_flags); + ), + + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ + "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \ + "device=%x comp_id=%s validity_flags='%s'", + __entry->dpa, show_dpa_flags(__entry->dpa_flags), + show_event_desc_flags(__entry->descriptor), + show_mem_event_type(__entry->type), + show_trans_type(__entry->transaction_type), + __entry->channel, __entry->rank, __entry->device, + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_valid_flags(__entry->validity_flags) + ) +); + +/* + * DRAM Event Record - DER + * + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +/* + * DRAM Event Record defines many fields the same as the General Media Event + * Record. Reuse those definitions as appropriate. + */ +#define CXL_DER_VALID_CHANNEL BIT(0) +#define CXL_DER_VALID_RANK BIT(1) +#define CXL_DER_VALID_NIBBLE BIT(2) +#define CXL_DER_VALID_BANK_GROUP BIT(3) +#define CXL_DER_VALID_BANK BIT(4) +#define CXL_DER_VALID_ROW BIT(5) +#define CXL_DER_VALID_COLUMN BIT(6) +#define CXL_DER_VALID_CORRECTION_MASK BIT(7) +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_DER_VALID_RANK, "RANK" }, \ + { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_DER_VALID_BANK, "BANK" }, \ + { CXL_DER_VALID_ROW, "ROW" }, \ + { CXL_DER_VALID_COLUMN, "COLUMN" }, \ + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ +) + +TRACE_EVENT(cxl_dram, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_dram *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + /* DRAM */ + __field(u64, dpa) + __field(u8, descriptor) + __field(u8, type) + __field(u8, transaction_type) + __field(u8, channel) + __field(u16, validity_flags) + __field(u16, column) /* Out of order to pack trace record */ + __field(u32, nibble_mask) + __field(u32, row) + __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE) + __field(u8, rank) /* Out of order to pack trace record */ + __field(u8, bank_group) /* Out of order to pack trace record */ + __field(u8, bank) /* Out of order to pack trace record */ + __field(u8, dpa_flags) /* Out of order to pack trace record */ + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* DRAM */ + __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; + __entry->dpa &= CXL_DPA_MASK; + __entry->descriptor = rec->descriptor; + __entry->type = rec->type; + __entry->transaction_type = rec->transaction_type; + __entry->validity_flags = get_unaligned_le16(rec->validity_flags); + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); + __entry->bank_group = rec->bank_group; + __entry->bank = rec->bank; + __entry->row = get_unaligned_le24(rec->row); + __entry->column = get_unaligned_le16(rec->column); + memcpy(__entry->cor_mask, &rec->correction_mask, + CXL_EVENT_DER_CORRECTION_MASK_SIZE); + ), + + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \ + "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \ + "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \ + "validity_flags='%s'", + __entry->dpa, show_dpa_flags(__entry->dpa_flags), + show_event_desc_flags(__entry->descriptor), + show_mem_event_type(__entry->type), + show_trans_type(__entry->transaction_type), + __entry->channel, __entry->rank, __entry->nibble_mask, + __entry->bank_group, __entry->bank, + __entry->row, __entry->column, + __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE), + show_dram_valid_flags(__entry->validity_flags) + ) +); + +/* + * Memory Module Event Record - MMER + * + * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00 +#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01 +#define CXL_MMER_LIFE_USED_CHANGE 0x02 +#define CXL_MMER_TEMP_CHANGE 0x03 +#define CXL_MMER_DATA_PATH_ERROR 0x04 +#define CXL_MMER_LSA_ERROR 0x05 +#define show_dev_evt_type(type) __print_symbolic(type, \ + { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \ + { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \ + { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \ + { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \ + { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_MMER_LSA_ERROR, "LSA Error" } \ +) + +/* + * Device Health Information - DHI + * + * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0) +#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1) +#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2) +#define show_health_status_flags(flags) __print_flags(flags, "|", \ + { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \ + { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \ + { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \ +) + +#define CXL_DHI_MS_NORMAL 0x00 +#define CXL_DHI_MS_NOT_READY 0x01 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02 +#define CXL_DHI_MS_ALL_DATA_LOST 0x03 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09 +#define show_media_status(ms) __print_symbolic(ms, \ + { CXL_DHI_MS_NORMAL, \ + "Normal" }, \ + { CXL_DHI_MS_NOT_READY, \ + "Not Ready" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \ + "Write Persistency Lost" }, \ + { CXL_DHI_MS_ALL_DATA_LOST, \ + "All Data Lost" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \ + "Write Persistency Loss in the Event of Power Loss" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \ + "Write Persistency Loss in Event of Shutdown" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \ + "Write Persistency Loss Imminent" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \ + "All Data Loss in Event of Power Loss" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \ + "All Data loss in the Event of Shutdown" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \ + "All Data Loss Imminent" } \ +) + +#define CXL_DHI_AS_NORMAL 0x0 +#define CXL_DHI_AS_WARNING 0x1 +#define CXL_DHI_AS_CRITICAL 0x2 +#define show_two_bit_status(as) __print_symbolic(as, \ + { CXL_DHI_AS_NORMAL, "Normal" }, \ + { CXL_DHI_AS_WARNING, "Warning" }, \ + { CXL_DHI_AS_CRITICAL, "Critical" } \ +) +#define show_one_bit_status(as) __print_symbolic(as, \ + { CXL_DHI_AS_NORMAL, "Normal" }, \ + { CXL_DHI_AS_WARNING, "Warning" } \ +) + +#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3) +#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2) +#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4) +#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5) + +TRACE_EVENT(cxl_memory_module, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_mem_module *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + + /* Memory Module Event */ + __field(u8, event_type) + + /* Device Health Info */ + __field(u8, health_status) + __field(u8, media_status) + __field(u8, life_used) + __field(u32, dirty_shutdown_cnt) + __field(u32, cor_vol_err_cnt) + __field(u32, cor_per_err_cnt) + __field(s16, device_temp) + __field(u8, add_status) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* Memory Module Event */ + __entry->event_type = rec->event_type; + + /* Device Health Info */ + __entry->health_status = rec->info.health_status; + __entry->media_status = rec->info.media_status; + __entry->life_used = rec->info.life_used; + __entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt); + __entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt); + __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt); + __entry->device_temp = get_unaligned_le16(rec->info.device_temp); + __entry->add_status = rec->info.add_status; + ), + + CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \ + "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ + "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \ + "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u", + show_dev_evt_type(__entry->event_type), + show_health_status_flags(__entry->health_status), + show_media_status(__entry->media_status), + show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)), + show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)), + show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)), + show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)), + __entry->life_used, __entry->device_temp, + __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt, + __entry->cor_per_err_cnt + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 197ecffce4d0..64258442622a 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -157,6 +157,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3 #define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000 +/* CXL 3.0 8.2.8.3.1 Event Status Register */ +#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00 +#define CXLDEV_EVENT_STATUS_INFO BIT(0) +#define CXLDEV_EVENT_STATUS_WARN BIT(1) +#define CXLDEV_EVENT_STATUS_FAIL BIT(2) +#define CXLDEV_EVENT_STATUS_FATAL BIT(3) + +#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \ + CXLDEV_EVENT_STATUS_WARN | \ + CXLDEV_EVENT_STATUS_FAIL | \ + CXLDEV_EVENT_STATUS_FATAL) + +/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */ +#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0) +#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4) + /* CXL 2.0 8.2.8.4 Mailbox Registers */ #define CXLDEV_MBOX_CAPS_OFFSET 0x00 #define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0) @@ -262,6 +278,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev, * cxl_decoder flags that define the type of memory / devices this * decoder supports as well as configuration lock status See "CXL 2.0 * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. */ #define CXL_DECODER_F_RAM BIT(0) #define CXL_DECODER_F_PMEM BIT(1) @@ -321,12 +339,36 @@ enum cxl_decoder_mode { CXL_DECODER_DEAD, }; +static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) +{ + static const char * const names[] = { + [CXL_DECODER_NONE] = "none", + [CXL_DECODER_RAM] = "ram", + [CXL_DECODER_PMEM] = "pmem", + [CXL_DECODER_MIXED] = "mixed", + }; + + if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) + return names[mode]; + return "mixed"; +} + +/* + * Track whether this decoder is reserved for region autodiscovery, or + * free for userspace provisioning. + */ +enum cxl_decoder_state { + CXL_DECODER_STATE_MANUAL, + CXL_DECODER_STATE_AUTO, +}; + /** * struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder * @cxld: base cxl_decoder_object * @dpa_res: actively claimed DPA span of this decoder * @skip: offset into @dpa_res where @cxld.hpa_range maps * @mode: which memory type / access-mode-partition this decoder targets + * @state: autodiscovery state * @pos: interleave position in @cxld.region */ struct cxl_endpoint_decoder { @@ -334,6 +376,7 @@ struct cxl_endpoint_decoder { struct resource *dpa_res; resource_size_t skip; enum cxl_decoder_mode mode; + enum cxl_decoder_state state; int pos; }; @@ -367,6 +410,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, * @region_id: region id for next region provisioning event * @calc_hb: which host bridge covers the n'th position by granularity * @platform_data: platform specific configuration data + * @range_lock: sync region autodiscovery by address range * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { @@ -374,6 +418,7 @@ struct cxl_root_decoder { atomic_t region_id; cxl_calc_hb_fn calc_hb; void *platform_data; + struct mutex range_lock; struct cxl_switch_decoder cxlsd; }; @@ -423,6 +468,13 @@ struct cxl_region_params { */ #define CXL_REGION_F_INCOHERENT 0 +/* + * Indicate whether this region has been assembled by autodetection or + * userspace assembly. Prevent endpoint decoders outside of automatic + * detection from being added to the region. + */ +#define CXL_REGION_F_AUTO 1 + /** * struct cxl_region - CXL region * @dev: This region's device @@ -478,6 +530,12 @@ struct cxl_pmem_region { struct cxl_pmem_region_mapping mapping[]; }; +struct cxl_dax_region { + struct device dev; + struct cxl_region *cxlr; + struct range hpa_range; +}; + /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -618,8 +676,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); +struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); +bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets, @@ -670,6 +730,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_MEMORY_EXPANDER 5 #define CXL_DEVICE_REGION 6 #define CXL_DEVICE_PMEM_REGION 7 +#define CXL_DEVICE_DAX_REGION 8 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" @@ -686,6 +747,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); +int cxl_add_to_region(struct cxl_port *root, + struct cxl_endpoint_decoder *cxled); +struct cxl_dax_region *to_cxl_dax_region(struct device *dev); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -695,6 +759,15 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) { return NULL; } +static inline int cxl_add_to_region(struct cxl_port *root, + struct cxl_endpoint_decoder *cxled) +{ + return 0; +} +static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + return NULL; +} #endif /* diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2d85776236dd..64ede1a06eaf 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -4,6 +4,7 @@ #define __CXL_MEM_H__ #include <uapi/linux/cxl_mem.h> #include <linux/cdev.h> +#include <linux/uuid.h> #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -38,6 +39,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @id: id number of this memdev instance. + * @depth: endpoint port depth */ struct cxl_memdev { struct device dev; @@ -47,6 +49,7 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; int id; + int depth; }; static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) @@ -79,6 +82,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) } struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, + resource_size_t base, resource_size_t len, + resource_size_t skipped); static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) @@ -194,6 +200,34 @@ struct cxl_endpoint_dvsec_info { }; /** + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +enum cxl_event_int_mode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02 +}; +struct cxl_event_interrupt_policy { + u8 info_settings; + u8 warn_settings; + u8 failure_settings; + u8 fatal_settings; +} __packed; + +/** + * struct cxl_event_state - Event log driver state + * + * @event_buf: Buffer to receive event data + * @event_log_lock: Serialize event_buf and log use + */ +struct cxl_event_state { + struct cxl_get_event_payload *buf; + struct mutex log_lock; +}; + +/** * struct cxl_dev_state - The driver device state * * cxl_dev_state represents the CXL driver/device state. It provides an @@ -266,14 +300,21 @@ struct cxl_dev_state { struct xarray doe_mbs; + struct cxl_event_state event; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, + CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, + CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, + CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, + CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, + CXL_MBOX_OP_SET_TIMESTAMP = 0x0301, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, CXL_MBOX_OP_GET_LOG = 0x0401, CXL_MBOX_OP_IDENTIFY = 0x4000, @@ -347,6 +388,136 @@ struct cxl_mbox_identify { u8 qos_telemetry_caps; } __packed; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + uuid_t id; + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_record_raw { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * Get Event Records output payload + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + */ +#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) +#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) +struct cxl_get_event_payload { + u8 flags; + u8 reserved1; + __le16 overflow_err_count; + __le64 first_overflow_timestamp; + __le64 last_overflow_timestamp; + __le16 record_count; + u8 reserved2[10]; + struct cxl_event_record_raw records[]; +} __packed; + +/* + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + */ +enum cxl_event_log_type { + CXL_EVENT_TYPE_INFO = 0x00, + CXL_EVENT_TYPE_WARN, + CXL_EVENT_TYPE_FAIL, + CXL_EVENT_TYPE_FATAL, + CXL_EVENT_TYPE_MAX +}; + +/* + * Clear Event Records input payload + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + */ +struct cxl_mbox_clear_event_payload { + u8 event_log; /* enum cxl_event_log_type */ + u8 clear_flags; + u8 nr_recs; + u8 reserved[3]; + __le16 handles[]; +} __packed; +#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; @@ -372,6 +543,12 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ +struct cxl_mbox_set_timestamp_in { + __le64 timestamp; + +} __packed; + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -440,6 +617,9 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); +int cxl_set_timestamp(struct cxl_dev_state *cxlds); + #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 77dbdb980b12..a8ea04f536ab 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -53,6 +53,12 @@ #define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) #define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) +/* + * NOTE: Currently all the functions which are enabled for CXL require their + * vectors to be in the first 16. Use this as the default max. + */ +#define CXL_PCI_DEFAULT_MAX_VECTORS 16 + /* Register Block Identifier (RBI) */ enum cxl_regloc_type { CXL_REGLOC_RBI_EMPTY = 0, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index a509640994d7..60b23624d167 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -473,8 +473,234 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) return 0; } +static void free_event_buf(void *buf) +{ + kvfree(buf); +} + +/* + * There is a single buffer for reading event logs from the mailbox. All logs + * share this buffer protected by the cxlds->event_log_lock. + */ +static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds) +{ + struct cxl_get_event_payload *buf; + + buf = kvmalloc(cxlds->payload_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cxlds->event.buf = buf; + + return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf); +} + +static int cxl_alloc_irq_vectors(struct pci_dev *pdev) +{ + int nvecs; + + /* + * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must + * not generate INTx messages if that function participates in + * CXL.cache or CXL.mem. + * + * Additionally pci_alloc_irq_vectors() handles calling + * pci_free_irq_vectors() automatically despite not being called + * pcim_*. See pci_setup_msi_context(). + */ + nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nvecs < 1) { + dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); + return -ENXIO; + } + return 0; +} + +struct cxl_dev_id { + struct cxl_dev_state *cxlds; +}; + +static irqreturn_t cxl_event_thread(int irq, void *id) +{ + struct cxl_dev_id *dev_id = id; + struct cxl_dev_state *cxlds = dev_id->cxlds; + u32 status; + + do { + /* + * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; + * ignore the reserved upper 32 bits + */ + status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + /* Ignore logs unknown to the driver */ + status &= CXLDEV_EVENT_STATUS_ALL; + if (!status) + break; + cxl_mem_get_event_records(cxlds, status); + cond_resched(); + } while (status); + + return IRQ_HANDLED; +} + +static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) +{ + struct device *dev = cxlds->dev; + struct pci_dev *pdev = to_pci_dev(dev); + struct cxl_dev_id *dev_id; + int irq; + + if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) + return -ENXIO; + + /* dev_id must be globally unique and must contain the cxlds */ + dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); + if (!dev_id) + return -ENOMEM; + dev_id->cxlds = cxlds; + + irq = pci_irq_vector(pdev, + FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); + if (irq < 0) + return irq; + + return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread, + IRQF_SHARED | IRQF_ONESHOT, NULL, + dev_id); +} + +static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy) +{ + struct cxl_mbox_cmd mbox_cmd = { + .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, + .payload_out = policy, + .size_out = sizeof(*policy), + }; + int rc; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) + dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", + rc); + + return rc; +} + +static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy) +{ + struct cxl_mbox_cmd mbox_cmd; + int rc; + + *policy = (struct cxl_event_interrupt_policy) { + .info_settings = CXL_INT_MSI_MSIX, + .warn_settings = CXL_INT_MSI_MSIX, + .failure_settings = CXL_INT_MSI_MSIX, + .fatal_settings = CXL_INT_MSI_MSIX, + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, + .payload_in = policy, + .size_in = sizeof(*policy), + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", + rc); + return rc; + } + + /* Retrieve final interrupt settings */ + return cxl_event_get_int_policy(cxlds, policy); +} + +static int cxl_event_irqsetup(struct cxl_dev_state *cxlds) +{ + struct cxl_event_interrupt_policy policy; + int rc; + + rc = cxl_event_config_msgnums(cxlds, &policy); + if (rc) + return rc; + + rc = cxl_event_req_irq(cxlds, policy.info_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.warn_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.failure_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.fatal_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); + return rc; + } + + return 0; +} + +static bool cxl_event_int_is_fw(u8 setting) +{ + u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); + + return mode == CXL_INT_FW; +} + +static int cxl_event_config(struct pci_host_bridge *host_bridge, + struct cxl_dev_state *cxlds) +{ + struct cxl_event_interrupt_policy policy; + int rc; + + /* + * When BIOS maintains CXL error reporting control, it will process + * event records. Only one agent can do so. + */ + if (!host_bridge->native_cxl_error) + return 0; + + rc = cxl_mem_alloc_event_buf(cxlds); + if (rc) + return rc; + + rc = cxl_event_get_int_policy(cxlds, &policy); + if (rc) + return rc; + + if (cxl_event_int_is_fw(policy.info_settings) || + cxl_event_int_is_fw(policy.warn_settings) || + cxl_event_int_is_fw(policy.failure_settings) || + cxl_event_int_is_fw(policy.fatal_settings)) { + dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n"); + return -EBUSY; + } + + rc = cxl_event_irqsetup(cxlds); + if (rc) + return rc; + + cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); struct cxl_register_map map; struct cxl_memdev *cxlmd; struct cxl_dev_state *cxlds; @@ -490,6 +716,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = pcim_enable_device(pdev); if (rc) return rc; + pci_set_master(pdev); cxlds = cxl_dev_state_create(&pdev->dev); if (IS_ERR(cxlds)) @@ -538,6 +765,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_set_timestamp(cxlds); + if (rc) + return rc; + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; @@ -546,10 +777,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_alloc_irq_vectors(pdev); + if (rc) + return rc; + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + rc = cxl_event_config(host_bridge, cxlds); + if (rc) + return rc; + rc = cxl_pci_ras_unmask(pdev); if (rc) dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index eedefebc4283..71cfa1fdf902 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev) return rc; set_bit(NDD_LABELING, &flags); + set_bit(NDD_REGISTER_SYNC, &flags); set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); @@ -225,11 +226,35 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc, return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len); } +static int detach_nvdimm(struct device *dev, void *data) +{ + struct cxl_nvdimm *cxl_nvd; + bool release = false; + + if (!is_cxl_nvdimm(dev)) + return 0; + + device_lock(dev); + if (!dev->driver) + goto out; + + cxl_nvd = to_cxl_nvdimm(dev); + if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) + release = true; +out: + device_unlock(dev); + if (release) + device_release_driver(dev); + return 0; +} + static void unregister_nvdimm_bus(void *_cxl_nvb) { struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb; struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus; + bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, detach_nvdimm); + cxl_nvb->nvdimm_bus = NULL; nvdimm_bus_unregister(nvdimm_bus); } diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 5453771bf330..d6c151dabaa7 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -30,57 +30,111 @@ static void schedule_detach(void *cxlmd) schedule_cxl_memdev_detach(cxlmd); } -static int cxl_port_probe(struct device *dev) +static int discover_region(struct device *dev, void *root) +{ + struct cxl_endpoint_decoder *cxled; + int rc; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0) + return 0; + + if (cxled->state != CXL_DECODER_STATE_AUTO) + return 0; + + /* + * Region enumeration is opportunistic, if this add-event fails, + * continue to the next endpoint decoder. + */ + rc = cxl_add_to_region(root, cxled); + if (rc) + dev_dbg(dev, "failed to add to region: %#llx-%#llx\n", + cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end); + + return 0; +} + +static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_port *port = to_cxl_port(dev); struct cxl_hdm *cxlhdm; int rc; + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; - if (!is_cxl_endpoint(port)) { - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); - } + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); cxlhdm = devm_cxl_setup_hdm(port); if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); - if (is_cxl_endpoint(port)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - struct cxl_dev_state *cxlds = cxlmd->cxlds; + return devm_cxl_enumerate_decoders(cxlhdm); +} + +static int cxl_endpoint_port_probe(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + struct cxl_port *root; + int rc; + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); - /* Cache the data early to ensure is_visible() works */ - read_cdat_data(port); + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); - get_device(&cxlmd->dev); - rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); - if (rc) - return rc; + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd); + if (rc) + return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm); - if (rc) - return rc; + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + if (rc) + return rc; - rc = cxl_await_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } + rc = cxl_await_media_ready(cxlds); + if (rc) { + dev_err(&port->dev, "Media not active (%d)\n", rc); + return rc; } rc = devm_cxl_enumerate_decoders(cxlhdm); - if (rc) { - dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); + if (rc) return rc; - } + + /* + * This can't fail in practice as CXL root exit unregisters all + * descendant ports and that in turn synchronizes with cxl_port_probe() + */ + root = find_cxl_root(&cxlmd->dev); + + /* + * Now that all endpoint decoders are successfully enumerated, try to + * assemble regions from committed decoders + */ + device_for_each_child(&port->dev, root, discover_region); + put_device(&root->dev); return 0; } +static int cxl_port_probe(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + + if (is_cxl_endpoint(port)) + return cxl_endpoint_port_probe(port); + return cxl_switch_port_probe(port); +} + static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) |