summaryrefslogtreecommitdiff
path: root/drivers/cxl
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/Kconfig12
-rw-r--r--drivers/cxl/acpi.c4
-rw-r--r--drivers/cxl/core/core.h7
-rw-r--r--drivers/cxl/core/hdm.c25
-rw-r--r--drivers/cxl/core/mbox.c226
-rw-r--r--drivers/cxl/core/memdev.c1
-rw-r--r--drivers/cxl/core/pci.c12
-rw-r--r--drivers/cxl/core/pmem.c42
-rw-r--r--drivers/cxl/core/port.c92
-rw-r--r--drivers/cxl/core/region.c867
-rw-r--r--drivers/cxl/core/trace.h480
-rw-r--r--drivers/cxl/cxl.h73
-rw-r--r--drivers/cxl/cxlmem.h180
-rw-r--r--drivers/cxl/cxlpci.h6
-rw-r--r--drivers/cxl/pci.c239
-rw-r--r--drivers/cxl/pmem.c25
-rw-r--r--drivers/cxl/port.c112
17 files changed, 2195 insertions, 208 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 9e709ecba50f..ff4e78117b31 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -104,12 +104,22 @@ config CXL_SUSPEND
depends on SUSPEND && CXL_MEM
config CXL_REGION
- bool
+ bool "CXL: Region Support"
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
+ help
+ Enable the CXL core to enumerate and provision CXL regions. A CXL
+ region is defined by one or more CXL expanders that decode a given
+ system-physical address range. For CXL regions established by
+ platform-firmware this option enables memory error handling to
+ identify the devices participating in a given interleaved memory
+ range. Otherwise, platform-firmware managed CXL is enabled by being
+ placed in the system address map and does not need a driver.
+
+ If unsure say 'y'
config CXL_REGION_INVALIDATION_TEST
bool "CXL: Region Cache Management Bypass (TEST)"
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 6927149f2a16..7e1765b09e04 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -731,9 +731,9 @@ static void __exit cxl_acpi_exit(void)
cxl_bus_drain();
}
-module_init(cxl_acpi_init);
+/* load before dax_hmem sees 'Soft Reserved' CXL ranges */
+subsys_initcall(cxl_acpi_init);
module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(ACPI);
-MODULE_SOFTDEP("pre: cxl_pmem");
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 8c04672dca56..cde475e13216 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -11,15 +11,18 @@ extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
+extern struct device_attribute dev_attr_create_ram_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
+extern const struct device_type cxl_dax_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
+#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
@@ -37,6 +40,7 @@ static inline void cxl_region_exit(void)
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
+#define CXL_DAX_REGION_TYPE(x) NULL
#endif
struct cxl_send_command;
@@ -56,9 +60,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
-bool is_switch_decoder(struct device *dev);
-struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
-
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index dcc16d7cb8f3..80eccae6ba9e 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -279,7 +279,7 @@ success:
return 0;
}
-static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
{
@@ -295,6 +295,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
@@ -676,6 +677,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
port->commit_end--;
cxld->flags &= ~CXL_DECODER_F_ENABLE;
+ /* Userspace is now responsible for reconfiguring this decoder */
+ if (is_endpoint_decoder(&cxld->dev)) {
+ struct cxl_endpoint_decoder *cxled;
+
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ cxled->state = CXL_DECODER_STATE_MANUAL;
+ }
+
return 0;
}
@@ -783,6 +792,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
return rc;
}
*dpa_base += dpa_size + skip;
+
+ cxled->state = CXL_DECODER_STATE_AUTO;
+
return 0;
}
@@ -826,7 +838,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxled = cxl_endpoint_decoder_alloc(port);
if (IS_ERR(cxled)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxled);
}
cxld = &cxled->cxld;
@@ -836,7 +849,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxlsd = cxl_switch_decoder_alloc(port, target_count);
if (IS_ERR(cxlsd)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxlsd);
}
cxld = &cxlsd->cxld;
@@ -844,13 +858,16 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base);
if (rc) {
+ dev_warn(&port->dev,
+ "Failed to initialize decoder%d.%d\n",
+ port->id, i);
put_device(&cxld->dev);
return rc;
}
rc = add_hdm_decoder(port, cxld, target_map);
if (rc) {
dev_warn(&port->dev,
- "Failed to add decoder to port\n");
+ "Failed to add decoder%d.%d\n", port->id, i);
return rc;
}
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index fd3b13b0fb7f..fc7631bb1c24 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -3,11 +3,13 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/security.h>
#include <linux/debugfs.h>
+#include <linux/ktime.h>
#include <linux/mutex.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
+#include "trace.h"
static bool cxl_raw_allow_all;
@@ -742,6 +744,203 @@ out:
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+static const uuid_t gen_media_event_uuid =
+ UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
+
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+static const uuid_t dram_event_uuid =
+ UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+static const uuid_t mem_mod_event_uuid =
+ UUID_INIT(0xfe927475, 0xdd59, 0x4339,
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
+
+static void cxl_event_trace_record(const struct device *dev,
+ enum cxl_event_log_type type,
+ struct cxl_event_record_raw *record)
+{
+ uuid_t *id = &record->hdr.id;
+
+ if (uuid_equal(id, &gen_media_event_uuid)) {
+ struct cxl_event_gen_media *rec =
+ (struct cxl_event_gen_media *)record;
+
+ trace_cxl_general_media(dev, type, rec);
+ } else if (uuid_equal(id, &dram_event_uuid)) {
+ struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
+
+ trace_cxl_dram(dev, type, rec);
+ } else if (uuid_equal(id, &mem_mod_event_uuid)) {
+ struct cxl_event_mem_module *rec =
+ (struct cxl_event_mem_module *)record;
+
+ trace_cxl_memory_module(dev, type, rec);
+ } else {
+ /* For unknown record types print just the header */
+ trace_cxl_generic_event(dev, type, record);
+ }
+}
+
+static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
+ enum cxl_event_log_type log,
+ struct cxl_get_event_payload *get_pl)
+{
+ struct cxl_mbox_clear_event_payload *payload;
+ u16 total = le16_to_cpu(get_pl->record_count);
+ u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
+ size_t pl_size = struct_size(payload, handles, max_handles);
+ struct cxl_mbox_cmd mbox_cmd;
+ u16 cnt;
+ int rc = 0;
+ int i;
+
+ /* Payload size may limit the max handles */
+ if (pl_size > cxlds->payload_size) {
+ max_handles = (cxlds->payload_size - sizeof(*payload)) /
+ sizeof(__le16);
+ pl_size = struct_size(payload, handles, max_handles);
+ }
+
+ payload = kvzalloc(pl_size, GFP_KERNEL);
+ if (!payload)
+ return -ENOMEM;
+
+ *payload = (struct cxl_mbox_clear_event_payload) {
+ .event_log = log,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
+ .payload_in = payload,
+ .size_in = pl_size,
+ };
+
+ /*
+ * Clear Event Records uses u8 for the handle cnt while Get Event
+ * Record can return up to 0xffff records.
+ */
+ i = 0;
+ for (cnt = 0; cnt < total; cnt++) {
+ payload->handles[i++] = get_pl->records[cnt].hdr.handle;
+ dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
+ log, le16_to_cpu(payload->handles[i]));
+
+ if (i == max_handles) {
+ payload->nr_recs = i;
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto free_pl;
+ i = 0;
+ }
+ }
+
+ /* Clear what is left if any */
+ if (i) {
+ payload->nr_recs = i;
+ mbox_cmd.size_in = struct_size(payload, handles, i);
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto free_pl;
+ }
+
+free_pl:
+ kvfree(payload);
+ return rc;
+}
+
+static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
+ enum cxl_event_log_type type)
+{
+ struct cxl_get_event_payload *payload;
+ struct cxl_mbox_cmd mbox_cmd;
+ u8 log_type = type;
+ u16 nr_rec;
+
+ mutex_lock(&cxlds->event.log_lock);
+ payload = cxlds->event.buf;
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
+ .payload_in = &log_type,
+ .size_in = sizeof(log_type),
+ .payload_out = payload,
+ .size_out = cxlds->payload_size,
+ .min_out = struct_size(payload, records, 0),
+ };
+
+ do {
+ int rc, i;
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc) {
+ dev_err_ratelimited(cxlds->dev,
+ "Event log '%d': Failed to query event records : %d",
+ type, rc);
+ break;
+ }
+
+ nr_rec = le16_to_cpu(payload->record_count);
+ if (!nr_rec)
+ break;
+
+ for (i = 0; i < nr_rec; i++)
+ cxl_event_trace_record(cxlds->dev, type,
+ &payload->records[i]);
+
+ if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
+ trace_cxl_overflow(cxlds->dev, type, payload);
+
+ rc = cxl_clear_event_record(cxlds, type, payload);
+ if (rc) {
+ dev_err_ratelimited(cxlds->dev,
+ "Event log '%d': Failed to clear events : %d",
+ type, rc);
+ break;
+ }
+ } while (nr_rec);
+
+ mutex_unlock(&cxlds->event.log_lock);
+}
+
+/**
+ * cxl_mem_get_event_records - Get Event Records from the device
+ * @cxlds: The device data for the operation
+ * @status: Event Status register value identifying which events are available.
+ *
+ * Retrieve all event records available on the device, report them as trace
+ * events, and clear them.
+ *
+ * See CXL rev 3.0 @8.2.9.2.2 Get Event Records
+ * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
+ */
+void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
+{
+ dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
+
+ if (status & CXLDEV_EVENT_STATUS_FATAL)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
+ if (status & CXLDEV_EVENT_STATUS_FAIL)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
+ if (status & CXLDEV_EVENT_STATUS_WARN)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
+ if (status & CXLDEV_EVENT_STATUS_INFO)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
+
/**
* cxl_mem_get_partition_info - Get partition info
* @cxlds: The device data for the operation
@@ -882,6 +1081,32 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
+int cxl_set_timestamp(struct cxl_dev_state *cxlds)
+{
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_mbox_set_timestamp_in pi;
+ int rc;
+
+ pi.timestamp = cpu_to_le64(ktime_get_real_ns());
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_TIMESTAMP,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ /*
+ * Command is optional. Devices may have another way of providing
+ * a timestamp, or may return all 0s in timestamp fields.
+ * Don't report an error if this command isn't supported
+ */
+ if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED))
+ return rc;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
+
struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
struct cxl_dev_state *cxlds;
@@ -893,6 +1118,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
}
mutex_init(&cxlds->mbox_mutex);
+ mutex_init(&cxlds->event.log_lock);
cxlds->dev = dev;
return cxlds;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 12bd9ddaba22..0af8856936dc 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -246,6 +246,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
if (rc < 0)
goto err;
cxlmd->id = rc;
+ cxlmd->depth = -1;
dev = &cxlmd->dev;
device_initialize(dev);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 1d1492440287..2b463f107cb5 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
}
-static bool range_contains(struct range *r1, struct range *r2)
-{
- return r1->start <= r2->start && r1->end >= r2->end;
-}
-
/* require dvsec ranges to be covered by a locked platform window */
static int dvsec_range_allowed(struct device *dev, void *arg)
{
@@ -684,8 +679,11 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
- addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ void __iomem *rcc_addr =
+ cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ readl(rcc_addr)));
} else {
fe = status;
}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index f3d2169b6731..c2e4b1093788 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -227,34 +227,16 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
return cxl_nvd;
}
-static void cxl_nvd_unregister(void *_cxl_nvd)
+static void cxlmd_release_nvdimm(void *_cxlmd)
{
- struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
- struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_memdev *cxlmd = _cxlmd;
+ struct cxl_nvdimm *cxl_nvd = cxlmd->cxl_nvd;
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
- /*
- * Either the bridge is in ->remove() context under the device_lock(),
- * or cxlmd_release_nvdimm() is cancelling the bridge's release action
- * for @cxl_nvd and doing it itself (while manually holding the bridge
- * lock).
- */
- device_lock_assert(&cxl_nvb->dev);
cxl_nvd->cxlmd = NULL;
cxlmd->cxl_nvd = NULL;
+ cxlmd->cxl_nvb = NULL;
device_unregister(&cxl_nvd->dev);
-}
-
-static void cxlmd_release_nvdimm(void *_cxlmd)
-{
- struct cxl_memdev *cxlmd = _cxlmd;
- struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
-
- device_lock(&cxl_nvb->dev);
- if (cxlmd->cxl_nvd)
- devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister,
- cxlmd->cxl_nvd);
- device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
}
@@ -293,22 +275,6 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
- /*
- * The two actions below arrange for @cxl_nvd to be deleted when either
- * the top-level PMEM bridge goes down, or the endpoint device goes
- * through ->remove().
- */
- device_lock(&cxl_nvb->dev);
- if (cxl_nvb->dev.driver)
- rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister,
- cxl_nvd);
- else
- rc = -ENXIO;
- device_unlock(&cxl_nvb->dev);
-
- if (rc)
- goto err_alloc;
-
/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 609aa6801b14..724190418698 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev)
return CXL_DEVICE_NVDIMM;
if (dev->type == CXL_PMEM_REGION_TYPE())
return CXL_DEVICE_PMEM_REGION;
+ if (dev->type == CXL_DAX_REGION_TYPE())
+ return CXL_DEVICE_DAX_REGION;
if (is_cxl_port(dev)) {
if (is_cxl_root(to_cxl_port(dev)))
return CXL_DEVICE_ROOT;
@@ -180,17 +182,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- switch (cxled->mode) {
- case CXL_DECODER_RAM:
- return sysfs_emit(buf, "ram\n");
- case CXL_DECODER_PMEM:
- return sysfs_emit(buf, "pmem\n");
- case CXL_DECODER_NONE:
- return sysfs_emit(buf, "none\n");
- case CXL_DECODER_MIXED:
- default:
- return sysfs_emit(buf, "mixed\n");
- }
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode));
}
static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
@@ -304,6 +296,7 @@ static struct attribute *cxl_decoder_root_attrs[] = {
&dev_attr_cap_type3.attr,
&dev_attr_target_list.attr,
SET_CXL_REGION_ATTR(create_pmem_region)
+ SET_CXL_REGION_ATTR(create_ram_region)
SET_CXL_REGION_ATTR(delete_region)
NULL,
};
@@ -315,6 +308,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
return (cxlrd->cxlsd.cxld.flags & flags) == flags;
}
+static bool can_create_ram(struct cxl_root_decoder *cxlrd)
+{
+ unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM;
+
+ return (cxlrd->cxlsd.cxld.flags & flags) == flags;
+}
+
static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@@ -323,7 +323,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *
if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
return 0;
- if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
+ if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd))
+ return 0;
+
+ if (a == CXL_REGION_ATTR(delete_region) &&
+ !(can_create_pmem(cxlrd) || can_create_ram(cxlrd)))
return 0;
return a->mode;
@@ -444,6 +448,7 @@ bool is_endpoint_decoder(struct device *dev)
{
return dev->type == &cxl_decoder_endpoint_type;
}
+EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL);
bool is_root_decoder(struct device *dev)
{
@@ -455,6 +460,7 @@ bool is_switch_decoder(struct device *dev)
{
return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type;
}
+EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL);
struct cxl_decoder *to_cxl_decoder(struct device *dev)
{
@@ -482,6 +488,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev)
return NULL;
return container_of(dev, struct cxl_switch_decoder, cxld.dev);
}
+EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL);
static void cxl_ep_release(struct cxl_ep *ep)
{
@@ -1207,6 +1214,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
get_device(&endpoint->dev);
dev_set_drvdata(dev, endpoint);
+ cxlmd->depth = endpoint->depth;
return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
@@ -1241,50 +1249,55 @@ static void reap_dports(struct cxl_port *port)
}
}
+struct detach_ctx {
+ struct cxl_memdev *cxlmd;
+ int depth;
+};
+
+static int port_has_memdev(struct device *dev, const void *data)
+{
+ const struct detach_ctx *ctx = data;
+ struct cxl_port *port;
+
+ if (!is_cxl_port(dev))
+ return 0;
+
+ port = to_cxl_port(dev);
+ if (port->depth != ctx->depth)
+ return 0;
+
+ return !!cxl_ep_load(port, ctx->cxlmd);
+}
+
static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
- struct device *iter;
- for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
- struct device *dport_dev = grandparent(iter);
+ for (int i = cxlmd->depth - 1; i >= 1; i--) {
struct cxl_port *port, *parent_port;
+ struct detach_ctx ctx = {
+ .cxlmd = cxlmd,
+ .depth = i,
+ };
+ struct device *dev;
struct cxl_ep *ep;
bool died = false;
- if (!dport_dev)
- break;
-
- port = find_cxl_port(dport_dev, NULL);
- if (!port)
- continue;
-
- if (is_cxl_root(port)) {
- put_device(&port->dev);
+ dev = bus_find_device(&cxl_bus_type, NULL, &ctx,
+ port_has_memdev);
+ if (!dev)
continue;
- }
+ port = to_cxl_port(dev);
parent_port = to_cxl_port(port->dev.parent);
device_lock(&parent_port->dev);
- if (!parent_port->dev.driver) {
- /*
- * The bottom-up race to delete the port lost to a
- * top-down port disable, give up here, because the
- * parent_port ->remove() will have cleaned up all
- * descendants.
- */
- device_unlock(&parent_port->dev);
- put_device(&port->dev);
- continue;
- }
-
device_lock(&port->dev);
ep = cxl_ep_load(port, cxlmd);
dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
cxl_ep_remove(port, ep);
if (ep && !port->dead && xa_empty(&port->endpoints) &&
- !is_cxl_root(parent_port)) {
+ !is_cxl_root(parent_port) && parent_port->dev.driver) {
/*
* This was the last ep attached to a dynamically
* enumerated port. Block new cxl_add_ep() and garbage
@@ -1620,6 +1633,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
}
cxlrd->calc_hb = calc_hb;
+ mutex_init(&cxlrd->range_lock);
cxld = &cxlsd->cxld;
cxld->dev.type = &cxl_decoder_root_type;
@@ -2003,6 +2017,6 @@ static void cxl_core_exit(void)
debugfs_remove_recursive(cxl_debugfs);
}
-module_init(cxl_core_init);
+subsys_initcall(cxl_core_init);
module_exit(cxl_core_exit);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3482a9e6d2f2..f29028148806 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uuid.h>
+#include <linux/sort.h>
#include <linux/idr.h>
#include <cxlmem.h>
#include <cxl.h>
@@ -45,7 +46,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
- rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
+ if (cxlr->mode != CXL_DECODER_PMEM)
+ rc = sysfs_emit(buf, "\n");
+ else
+ rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
up_read(&cxl_region_rwsem);
return rc;
@@ -131,7 +135,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_port *iter = cxled_to_port(cxled);
struct cxl_ep *ep;
- int rc;
+ int rc = 0;
while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
iter = to_cxl_port(iter->dev.parent);
@@ -143,7 +147,8 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
- rc = cxld->reset(cxld);
+ if (cxld->reset)
+ rc = cxld->reset(cxld);
if (rc)
return rc;
}
@@ -201,7 +206,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr)
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
- cxld->reset(cxld);
+ if (cxld->reset)
+ cxld->reset(cxld);
}
cxled->cxld.reset(&cxled->cxld);
@@ -300,8 +306,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
struct device *dev = kobj_to_dev(kobj);
struct cxl_region *cxlr = to_cxl_region(dev);
+ /*
+ * Support tooling that expects to find a 'uuid' attribute for all
+ * regions regardless of mode.
+ */
if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
- return 0;
+ return 0444;
return a->mode;
}
@@ -458,6 +468,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(resource);
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+}
+static DEVICE_ATTR_RO(mode);
+
static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
@@ -508,7 +527,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr)
if (device_is_registered(&cxlr->dev))
lockdep_assert_held_write(&cxl_region_rwsem);
if (p->res) {
- remove_resource(p->res);
+ /*
+ * Autodiscovered regions may not have been able to insert their
+ * resource.
+ */
+ if (p->res->parent)
+ remove_resource(p->res);
kfree(p->res);
p->res = NULL;
}
@@ -585,6 +609,7 @@ static struct attribute *cxl_region_attrs[] = {
&dev_attr_interleave_granularity.attr,
&dev_attr_resource.attr,
&dev_attr_size.attr,
+ &dev_attr_mode.attr,
NULL,
};
@@ -1006,10 +1031,10 @@ static int cxl_port_setup_targets(struct cxl_port *port,
int i, distance;
/*
- * Passthrough ports impose no distance requirements between
+ * Passthrough decoders impose no distance requirements between
* peers
*/
- if (port->nr_dports == 1)
+ if (cxl_rr->nr_targets == 1)
distance = 0;
else
distance = p->nr_targets / cxl_rr->nr_targets;
@@ -1088,12 +1113,35 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
- cxld->interleave_ways = iw;
- cxld->interleave_granularity = ig;
- cxld->hpa_range = (struct range) {
- .start = p->res->start,
- .end = p->res->end,
- };
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxld->interleave_ways != iw ||
+ cxld->interleave_granularity != ig ||
+ cxld->hpa_range.start != p->res->start ||
+ cxld->hpa_range.end != p->res->end ||
+ ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
+ dev_err(&cxlr->dev,
+ "%s:%s %s expected iw: %d ig: %d %pr\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, iw, ig, p->res);
+ dev_err(&cxlr->dev,
+ "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, cxld->interleave_ways,
+ cxld->interleave_granularity,
+ (cxld->flags & CXL_DECODER_F_ENABLE) ?
+ "enabled" :
+ "disabled",
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+ } else {
+ cxld->interleave_ways = iw;
+ cxld->interleave_granularity = ig;
+ cxld->hpa_range = (struct range) {
+ .start = p->res->start,
+ .end = p->res->end,
+ };
+ }
dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
dev_name(&port->dev), iw, ig);
add_target:
@@ -1104,7 +1152,17 @@ add_target:
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
return -ENXIO;
}
- cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
+ dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlsd->cxld.dev),
+ dev_name(ep->dport->dport),
+ cxl_rr->nr_targets_set);
+ return -ENXIO;
+ }
+ } else
+ cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
inc = 1;
out_target_set:
cxl_rr->nr_targets_set += inc;
@@ -1146,6 +1204,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
struct cxl_ep *ep;
int i;
+ /*
+ * In the auto-discovery case skip automatic teardown since the
+ * address space is already active
+ */
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+ return;
+
for (i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
cxlmd = cxled_to_memdev(cxled);
@@ -1178,8 +1243,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
iter = to_cxl_port(iter->dev.parent);
/*
- * Descend the topology tree programming targets while
- * looking for conflicts.
+ * Descend the topology tree programming / validating
+ * targets while looking for conflicts.
*/
for (ep = cxl_ep_load(iter, cxlmd); iter;
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
@@ -1194,29 +1259,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
return 0;
}
-static int cxl_region_attach(struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled, int pos)
+static int cxl_region_validate_position(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ int pos)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *ep_port, *root_port, *iter;
struct cxl_region_params *p = &cxlr->params;
- struct cxl_dport *dport;
- int i, rc = -ENXIO;
-
- if (cxled->mode == CXL_DECODER_DEAD) {
- dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
- return -ENODEV;
- }
-
- /* all full of members, or interleave config not established? */
- if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "region already active\n");
- return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "interleave config missing\n");
- return -ENXIO;
- }
+ int i;
if (pos < 0 || pos >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
@@ -1255,6 +1304,256 @@ static int cxl_region_attach(struct cxl_region *cxlr,
}
}
+ return 0;
+}
+
+static int cxl_region_attach_position(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled,
+ const struct cxl_dport *dport, int pos)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *iter;
+ int rc;
+
+ if (cxlrd->calc_hb(cxlrd, pos) != dport) {
+ dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ dev_name(&cxlrd->cxlsd.cxld.dev));
+ return -ENXIO;
+ }
+
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent)) {
+ rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (rc)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent))
+ cxl_port_detach_region(iter, cxlr, cxled);
+ return rc;
+}
+
+static int cxl_region_attach_auto(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO) {
+ dev_err(&cxlr->dev,
+ "%s: unable to add decoder to autodetected region\n",
+ dev_name(&cxled->cxld.dev));
+ return -EINVAL;
+ }
+
+ if (pos >= 0) {
+ dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
+ dev_name(&cxled->cxld.dev), pos);
+ return -EINVAL;
+ }
+
+ if (p->nr_targets >= p->interleave_ways) {
+ dev_err(&cxlr->dev, "%s: no more target slots available\n",
+ dev_name(&cxled->cxld.dev));
+ return -ENXIO;
+ }
+
+ /*
+ * Temporarily record the endpoint decoder into the target array. Yes,
+ * this means that userspace can view devices in the wrong position
+ * before the region activates, and must be careful to understand when
+ * it might be racing region autodiscovery.
+ */
+ pos = p->nr_targets;
+ p->targets[pos] = cxled;
+ cxled->pos = pos;
+ p->nr_targets++;
+
+ return 0;
+}
+
+static struct cxl_port *next_port(struct cxl_port *port)
+{
+ if (!port->parent_dport)
+ return NULL;
+ return port->parent_dport->port;
+}
+
+static int decoder_match_range(struct device *dev, void *data)
+{
+ struct cxl_endpoint_decoder *cxled = data;
+ struct cxl_switch_decoder *cxlsd;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
+}
+
+static void find_positions(const struct cxl_switch_decoder *cxlsd,
+ const struct cxl_port *iter_a,
+ const struct cxl_port *iter_b, int *a_pos,
+ int *b_pos)
+{
+ int i;
+
+ for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
+ if (cxlsd->target[i] == iter_a->parent_dport)
+ *a_pos = i;
+ else if (cxlsd->target[i] == iter_b->parent_dport)
+ *b_pos = i;
+ if (*a_pos >= 0 && *b_pos >= 0)
+ break;
+ }
+}
+
+static int cmp_decode_pos(const void *a, const void *b)
+{
+ struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
+ struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
+ struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
+ struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
+ struct cxl_port *port_a = cxled_to_port(cxled_a);
+ struct cxl_port *port_b = cxled_to_port(cxled_b);
+ struct cxl_port *iter_a, *iter_b, *port = NULL;
+ struct cxl_switch_decoder *cxlsd;
+ struct device *dev;
+ int a_pos, b_pos;
+ unsigned int seq;
+
+ /* Exit early if any prior sorting failed */
+ if (cxled_a->pos < 0 || cxled_b->pos < 0)
+ return 0;
+
+ /*
+ * Walk up the hierarchy to find a shared port, find the decoder that
+ * maps the range, compare the relative position of those dport
+ * mappings.
+ */
+ for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
+ struct cxl_port *next_a, *next_b;
+
+ next_a = next_port(iter_a);
+ if (!next_a)
+ break;
+
+ for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
+ next_b = next_port(iter_b);
+ if (next_a != next_b)
+ continue;
+ port = next_a;
+ break;
+ }
+
+ if (port)
+ break;
+ }
+
+ if (!port) {
+ dev_err(cxlmd_a->dev.parent,
+ "failed to find shared port with %s\n",
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
+ if (!dev) {
+ struct range *range = &cxled_a->cxld.hpa_range;
+
+ dev_err(port->uport,
+ "failed to find decoder that maps %#llx-%#llx\n",
+ range->start, range->end);
+ goto err;
+ }
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ do {
+ seq = read_seqbegin(&cxlsd->target_lock);
+ find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
+ } while (read_seqretry(&cxlsd->target_lock, seq));
+
+ put_device(dev);
+
+ if (a_pos < 0 || b_pos < 0) {
+ dev_err(port->uport,
+ "failed to find shared decoder for %s and %s\n",
+ dev_name(cxlmd_a->dev.parent),
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent),
+ a_pos - b_pos < 0 ? "before" : "after",
+ dev_name(cxlmd_b->dev.parent));
+
+ return a_pos - b_pos;
+err:
+ cxled_a->pos = -1;
+ return 0;
+}
+
+static int cxl_region_sort_targets(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i, rc = 0;
+
+ sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
+ NULL);
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ /*
+ * Record that sorting failed, but still continue to restore
+ * cxled->pos with its ->targets[] position so that follow-on
+ * code paths can reliably do p->targets[cxled->pos] to
+ * self-reference their entry.
+ */
+ if (cxled->pos < 0)
+ rc = -ENXIO;
+ cxled->pos = i;
+ }
+
+ dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
+ return rc;
+}
+
+static int cxl_region_attach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_port *ep_port, *root_port;
+ struct cxl_dport *dport;
+ int rc = -ENXIO;
+
+ if (cxled->mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
+ return -EINVAL;
+ }
+
+ if (cxled->mode == CXL_DECODER_DEAD) {
+ dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
+ return -ENODEV;
+ }
+
+ /* all full of members, or interleave config not established? */
+ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "region already active\n");
+ return -EBUSY;
+ } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "interleave config missing\n");
+ return -ENXIO;
+ }
+
ep_port = cxled_to_port(cxled);
root_port = cxlrd_to_port(cxlrd);
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
@@ -1265,13 +1564,6 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
- if (cxlrd->calc_hb(cxlrd, pos) != dport) {
- dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- dev_name(&cxlrd->cxlsd.cxld.dev));
- return -ENXIO;
- }
-
if (cxled->cxld.target_type != cxlr->type) {
dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
@@ -1295,13 +1587,58 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -EINVAL;
}
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent)) {
- rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ int i;
+
+ rc = cxl_region_attach_auto(cxlr, cxled, pos);
if (rc)
- goto err;
+ return rc;
+
+ /* await more targets to arrive... */
+ if (p->nr_targets < p->interleave_ways)
+ return 0;
+
+ /*
+ * All targets are here, which implies all PCI enumeration that
+ * affects this region has been completed. Walk the topology to
+ * sort the devices into their relative region decode position.
+ */
+ rc = cxl_region_sort_targets(cxlr);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ ep_port = cxled_to_port(cxled);
+ dport = cxl_find_dport_by_dev(root_port,
+ ep_port->host_bridge);
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
+ dport, i);
+ if (rc)
+ return rc;
+ }
+
+ rc = cxl_region_setup_targets(cxlr);
+ if (rc)
+ return rc;
+
+ /*
+ * If target setup succeeds in the autodiscovery case
+ * then the region is already committed.
+ */
+ p->state = CXL_CONFIG_COMMIT;
+
+ return 0;
}
+ rc = cxl_region_validate_position(cxlr, cxled, pos);
+ if (rc)
+ return rc;
+
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
+ if (rc)
+ return rc;
+
p->targets[pos] = cxled;
cxled->pos = pos;
p->nr_targets++;
@@ -1324,10 +1661,8 @@ static int cxl_region_attach(struct cxl_region *cxlr,
err_decrement:
p->nr_targets--;
-err:
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent))
- cxl_port_detach_region(iter, cxlr, cxled);
+ cxled->pos = -1;
+ p->targets[pos] = NULL;
return rc;
}
@@ -1399,31 +1734,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
up_write(&cxl_region_rwsem);
}
-static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
+static int attach_target(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ unsigned int state)
{
- struct device *dev;
- int rc;
-
- dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
- if (!dev)
- return -ENODEV;
-
- if (!is_endpoint_decoder(dev)) {
- put_device(dev);
- return -EINVAL;
- }
+ int rc = 0;
- rc = down_write_killable(&cxl_region_rwsem);
+ if (state == TASK_INTERRUPTIBLE)
+ rc = down_write_killable(&cxl_region_rwsem);
+ else
+ down_write(&cxl_region_rwsem);
if (rc)
- goto out;
+ return rc;
+
down_read(&cxl_dpa_rwsem);
- rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
+ rc = cxl_region_attach(cxlr, cxled, pos);
if (rc == 0)
set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
-out:
- put_device(dev);
return rc;
}
@@ -1461,8 +1790,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
if (sysfs_streq(buf, "\n"))
rc = detach_target(cxlr, pos);
- else
- rc = attach_target(cxlr, buf, pos);
+ else {
+ struct device *dev;
+
+ dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
+ if (!dev)
+ return -ENODEV;
+
+ if (!is_endpoint_decoder(dev)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
+ TASK_INTERRUPTIBLE);
+out:
+ put_device(dev);
+ }
if (rc < 0)
return rc;
@@ -1666,6 +2010,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
struct device *dev;
int rc;
+ switch (mode) {
+ case CXL_DECODER_RAM:
+ case CXL_DECODER_PMEM:
+ break;
+ default:
+ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+ return ERR_PTR(-EINVAL);
+ }
+
cxlr = cxl_region_alloc(cxlrd, id);
if (IS_ERR(cxlr))
return cxlr;
@@ -1694,12 +2047,38 @@ err:
return ERR_PTR(rc);
}
+static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
+{
+ return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+}
+
static ssize_t create_pmem_region_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
- return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+static ssize_t create_ram_region_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
+
+static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
+ enum cxl_decoder_mode mode, int id)
+{
+ int rc;
+
+ rc = memregion_alloc(GFP_KERNEL);
+ if (rc < 0)
+ return ERR_PTR(rc);
+
+ if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
+ memregion_free(rc);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER);
}
static ssize_t create_pmem_region_store(struct device *dev,
@@ -1708,29 +2087,39 @@ static ssize_t create_pmem_region_store(struct device *dev,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
- int id, rc;
+ int rc, id;
rc = sscanf(buf, "region%d\n", &id);
if (rc != 1)
return -EINVAL;
- rc = memregion_alloc(GFP_KERNEL);
- if (rc < 0)
- return rc;
+ cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
+ if (IS_ERR(cxlr))
+ return PTR_ERR(cxlr);
- if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
- memregion_free(rc);
- return -EBUSY;
- }
+ return len;
+}
+DEVICE_ATTR_RW(create_pmem_region);
- cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
- CXL_DECODER_EXPANDER);
+static ssize_t create_ram_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ struct cxl_region *cxlr;
+ int rc, id;
+
+ rc = sscanf(buf, "region%d\n", &id);
+ if (rc != 1)
+ return -EINVAL;
+
+ cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
if (IS_ERR(cxlr))
return PTR_ERR(cxlr);
return len;
}
-DEVICE_ATTR_RW(create_pmem_region);
+DEVICE_ATTR_RW(create_ram_region);
static ssize_t region_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1891,6 +2280,75 @@ out:
return cxlr_pmem;
}
+static void cxl_dax_region_release(struct device *dev)
+{
+ struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
+
+ kfree(cxlr_dax);
+}
+
+static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+const struct device_type cxl_dax_region_type = {
+ .name = "cxl_dax_region",
+ .release = cxl_dax_region_release,
+ .groups = cxl_dax_region_attribute_groups,
+};
+
+static bool is_cxl_dax_region(struct device *dev)
+{
+ return dev->type == &cxl_dax_region_type;
+}
+
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
+ "not a cxl_dax_region device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_dax_region, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
+
+static struct lock_class_key cxl_dax_region_key;
+
+static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+
+ down_read(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT) {
+ cxlr_dax = ERR_PTR(-ENXIO);
+ goto out;
+ }
+
+ cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
+ if (!cxlr_dax) {
+ cxlr_dax = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ cxlr_dax->hpa_range.start = p->res->start;
+ cxlr_dax->hpa_range.end = p->res->end;
+
+ dev = &cxlr_dax->dev;
+ cxlr_dax->cxlr = cxlr;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &cxlr->dev;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_dax_region_type;
+out:
+ up_read(&cxl_region_rwsem);
+
+ return cxlr_dax;
+}
+
static void cxlr_pmem_unregister(void *_cxlr_pmem)
{
struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
@@ -1975,6 +2433,227 @@ err_bridge:
return rc;
}
+static void cxlr_dax_unregister(void *_cxlr_dax)
+{
+ struct cxl_dax_region *cxlr_dax = _cxlr_dax;
+
+ device_unregister(&cxlr_dax->dev);
+}
+
+static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
+{
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+ int rc;
+
+ cxlr_dax = cxl_dax_region_alloc(cxlr);
+ if (IS_ERR(cxlr_dax))
+ return PTR_ERR(cxlr_dax);
+
+ dev = &cxlr_dax->dev;
+ rc = dev_set_name(dev, "dax_region%d", cxlr->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+ dev_name(dev));
+
+ return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
+ cxlr_dax);
+err:
+ put_device(dev);
+ return rc;
+}
+
+static int match_decoder_by_range(struct device *dev, void *data)
+{
+ struct range *r1, *r2 = data;
+ struct cxl_root_decoder *cxlrd;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ return range_contains(r1, r2);
+}
+
+static int match_region_by_range(struct device *dev, void *data)
+{
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct range *r = data;
+ int rc = 0;
+
+ if (!is_cxl_region(dev))
+ return 0;
+
+ cxlr = to_cxl_region(dev);
+ p = &cxlr->params;
+
+ down_read(&cxl_region_rwsem);
+ if (p->res && p->res->start == r->start && p->res->end == r->end)
+ rc = 1;
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct resource *res;
+ int rc;
+
+ do {
+ cxlr = __create_region(cxlrd, cxled->mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ down_write(&cxl_region_rwsem);
+ p = &cxlr->params;
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s autodiscovery interrupted\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__);
+ rc = -EBUSY;
+ goto err;
+ }
+
+ set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
+
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
+ dev_name(&cxlr->dev));
+ rc = insert_resource(cxlrd->res, res);
+ if (rc) {
+ /*
+ * Platform-firmware may not have split resources like "System
+ * RAM" on CXL window boundaries see cxl_region_iomem_release()
+ */
+ dev_warn(cxlmd->dev.parent,
+ "%s:%s: %s %s cannot insert resource\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, dev_name(&cxlr->dev));
+ }
+
+ p->res = res;
+ p->interleave_ways = cxled->cxld.interleave_ways;
+ p->interleave_granularity = cxled->cxld.interleave_granularity;
+ p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
+ if (rc)
+ goto err;
+
+ dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
+ dev_name(&cxlr->dev), p->res, p->interleave_ways,
+ p->interleave_granularity);
+
+ /* ...to match put_device() in cxl_add_to_region() */
+ get_device(&cxlr->dev);
+ up_write(&cxl_region_rwsem);
+
+ return cxlr;
+
+err:
+ up_write(&cxl_region_rwsem);
+ devm_release_action(port->uport, unregister_region, cxlr);
+ return ERR_PTR(rc);
+}
+
+int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_decoder *cxld = &cxled->cxld;
+ struct device *cxlrd_dev, *region_dev;
+ struct cxl_root_decoder *cxlrd;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ bool attach = false;
+ int rc;
+
+ cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
+ match_decoder_by_range);
+ if (!cxlrd_dev) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+
+ cxlrd = to_cxl_root_decoder(cxlrd_dev);
+
+ /*
+ * Ensure that if multiple threads race to construct_region() for @hpa
+ * one does the construction and the others add to that.
+ */
+ mutex_lock(&cxlrd->range_lock);
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev) {
+ cxlr = construct_region(cxlrd, cxled);
+ region_dev = &cxlr->dev;
+ } else
+ cxlr = to_cxl_region(region_dev);
+ mutex_unlock(&cxlrd->range_lock);
+
+ rc = PTR_ERR_OR_ZERO(cxlr);
+ if (rc)
+ goto out;
+
+ attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
+
+ down_read(&cxl_region_rwsem);
+ p = &cxlr->params;
+ attach = p->state == CXL_CONFIG_COMMIT;
+ up_read(&cxl_region_rwsem);
+
+ if (attach) {
+ /*
+ * If device_attach() fails the range may still be active via
+ * the platform-firmware memory map, otherwise the driver for
+ * regions is local to this file, so driver matching can't fail.
+ */
+ if (device_attach(&cxlr->dev) < 0)
+ dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
+ p->res);
+ }
+
+ put_device(region_dev);
+out:
+ put_device(cxlrd_dev);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
+
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
@@ -1999,6 +2678,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
return 0;
}
+static int is_system_ram(struct resource *res, void *arg)
+{
+ struct cxl_region *cxlr = arg;
+ struct cxl_region_params *p = &cxlr->params;
+
+ dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
+ return 1;
+}
+
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
@@ -2032,6 +2720,17 @@ out:
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
+ case CXL_DECODER_RAM:
+ /*
+ * The region can not be manged by CXL if any portion of
+ * it is already online as 'System RAM'
+ */
+ if (walk_iomem_res_desc(IORES_DESC_NONE,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ p->res->start, p->res->end, cxlr,
+ is_system_ram) > 0)
+ return 0;
+ return devm_cxl_add_dax_region(cxlr);
default:
dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
cxlr->mode);
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 20ca2fe2ca8e..c72ef9321cfe 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -6,8 +6,11 @@
#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _CXL_EVENTS_H
-#include <cxl.h>
#include <linux/tracepoint.h>
+#include <asm-generic/unaligned.h>
+
+#include <cxl.h>
+#include <cxlmem.h>
#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
@@ -103,6 +106,481 @@ TRACE_EVENT(cxl_aer_correctable_error,
)
);
+#define cxl_event_log_type_str(type) \
+ __print_symbolic(type, \
+ { CXL_EVENT_TYPE_INFO, "Informational" }, \
+ { CXL_EVENT_TYPE_WARN, "Warning" }, \
+ { CXL_EVENT_TYPE_FAIL, "Failure" }, \
+ { CXL_EVENT_TYPE_FATAL, "Fatal" })
+
+TRACE_EVENT(cxl_overflow,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_get_event_payload *payload),
+
+ TP_ARGS(dev, log, payload),
+
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(dev))
+ __field(int, log)
+ __field(u64, first_ts)
+ __field(u64, last_ts)
+ __field(u16, count)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(dev));
+ __entry->log = log;
+ __entry->count = le16_to_cpu(payload->overflow_err_count);
+ __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp);
+ __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp);
+ ),
+
+ TP_printk("%s: log=%s : %u records from %llu to %llu",
+ __get_str(dev_name), cxl_event_log_type_str(__entry->log),
+ __entry->count, __entry->first_ts, __entry->last_ts)
+
+);
+
+/*
+ * Common Event Record Format
+ * CXL 3.0 section 8.2.9.2.1; Table 8-42
+ */
+#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2)
+#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3)
+#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
+#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
+#define show_hdr_flags(flags) __print_flags(flags, " | ", \
+ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
+ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
+ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
+ { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \
+)
+
+/*
+ * Define macros for the common header of each CXL event.
+ *
+ * Tracepoints using these macros must do 3 things:
+ *
+ * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry
+ * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign;
+ * pass the dev, log, and CXL event header
+ * 3) Use CXL_EVT_TP_printk() instead of TP_printk()
+ *
+ * See the generic_event tracepoint as an example.
+ */
+#define CXL_EVT_TP_entry \
+ __string(dev_name, dev_name(dev)) \
+ __field(int, log) \
+ __field_struct(uuid_t, hdr_uuid) \
+ __field(u32, hdr_flags) \
+ __field(u16, hdr_handle) \
+ __field(u16, hdr_related_handle) \
+ __field(u64, hdr_timestamp) \
+ __field(u8, hdr_length) \
+ __field(u8, hdr_maint_op_class)
+
+#define CXL_EVT_TP_fast_assign(dev, l, hdr) \
+ __assign_str(dev_name, dev_name(dev)); \
+ __entry->log = (l); \
+ memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \
+ __entry->hdr_length = (hdr).length; \
+ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \
+ __entry->hdr_handle = le16_to_cpu((hdr).handle); \
+ __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
+ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
+ __entry->hdr_maint_op_class = (hdr).maint_op_class
+
+#define CXL_EVT_TP_printk(fmt, ...) \
+ TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \
+ "handle=%x related_handle=%x maint_op_class=%u" \
+ " : " fmt, \
+ __get_str(dev_name), cxl_event_log_type_str(__entry->log), \
+ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
+ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
+ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \
+ ##__VA_ARGS__)
+
+TRACE_EVENT(cxl_generic_event,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_event_record_raw *rec),
+
+ TP_ARGS(dev, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ __array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
+ memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH);
+ ),
+
+ CXL_EVT_TP_printk("%s",
+ __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
+);
+
+/*
+ * Physical Address field masks
+ *
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ *
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CXL_DPA_FLAGS_MASK 0x3F
+#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
+
+#define CXL_DPA_VOLATILE BIT(0)
+#define CXL_DPA_NOT_REPAIRABLE BIT(1)
+#define show_dpa_flags(flags) __print_flags(flags, "|", \
+ { CXL_DPA_VOLATILE, "VOLATILE" }, \
+ { CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \
+)
+
+/*
+ * General Media Event Record - GMER
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
+#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
+#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)
+#define show_event_desc_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \
+ { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \
+ { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \
+)
+
+#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
+#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
+#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
+#define show_mem_event_type(type) __print_symbolic(type, \
+ { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
+ { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
+ { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
+)
+
+#define CXL_GMER_TRANS_UNKNOWN 0x00
+#define CXL_GMER_TRANS_HOST_READ 0x01
+#define CXL_GMER_TRANS_HOST_WRITE 0x02
+#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03
+#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06
+#define show_trans_type(type) __print_symbolic(type, \
+ { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \
+ { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \
+ { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \
+ { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \
+ { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
+)
+
+#define CXL_GMER_VALID_CHANNEL BIT(0)
+#define CXL_GMER_VALID_RANK BIT(1)
+#define CXL_GMER_VALID_DEVICE BIT(2)
+#define CXL_GMER_VALID_COMPONENT BIT(3)
+#define show_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_GMER_VALID_RANK, "RANK" }, \
+ { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
+ { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
+)
+
+TRACE_EVENT(cxl_general_media,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_event_gen_media *rec),
+
+ TP_ARGS(dev, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ /* General Media */
+ __field(u64, dpa)
+ __field(u8, descriptor)
+ __field(u8, type)
+ __field(u8, transaction_type)
+ __field(u8, channel)
+ __field(u32, device)
+ __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
+ __field(u16, validity_flags)
+ /* Following are out of order to pack trace record */
+ __field(u8, rank)
+ __field(u8, dpa_flags)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
+
+ /* General Media */
+ __entry->dpa = le64_to_cpu(rec->phys_addr);
+ __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
+ /* Mask after flags have been parsed */
+ __entry->dpa &= CXL_DPA_MASK;
+ __entry->descriptor = rec->descriptor;
+ __entry->type = rec->type;
+ __entry->transaction_type = rec->transaction_type;
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->device = get_unaligned_le24(rec->device);
+ memcpy(__entry->comp_id, &rec->component_id,
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE);
+ __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
+ ),
+
+ CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
+ "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
+ "device=%x comp_id=%s validity_flags='%s'",
+ __entry->dpa, show_dpa_flags(__entry->dpa_flags),
+ show_event_desc_flags(__entry->descriptor),
+ show_mem_event_type(__entry->type),
+ show_trans_type(__entry->transaction_type),
+ __entry->channel, __entry->rank, __entry->device,
+ __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
+ show_valid_flags(__entry->validity_flags)
+ )
+);
+
+/*
+ * DRAM Event Record - DER
+ *
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+/*
+ * DRAM Event Record defines many fields the same as the General Media Event
+ * Record. Reuse those definitions as appropriate.
+ */
+#define CXL_DER_VALID_CHANNEL BIT(0)
+#define CXL_DER_VALID_RANK BIT(1)
+#define CXL_DER_VALID_NIBBLE BIT(2)
+#define CXL_DER_VALID_BANK_GROUP BIT(3)
+#define CXL_DER_VALID_BANK BIT(4)
+#define CXL_DER_VALID_ROW BIT(5)
+#define CXL_DER_VALID_COLUMN BIT(6)
+#define CXL_DER_VALID_CORRECTION_MASK BIT(7)
+#define show_dram_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_DER_VALID_RANK, "RANK" }, \
+ { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \
+ { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \
+ { CXL_DER_VALID_BANK, "BANK" }, \
+ { CXL_DER_VALID_ROW, "ROW" }, \
+ { CXL_DER_VALID_COLUMN, "COLUMN" }, \
+ { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \
+)
+
+TRACE_EVENT(cxl_dram,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_event_dram *rec),
+
+ TP_ARGS(dev, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ /* DRAM */
+ __field(u64, dpa)
+ __field(u8, descriptor)
+ __field(u8, type)
+ __field(u8, transaction_type)
+ __field(u8, channel)
+ __field(u16, validity_flags)
+ __field(u16, column) /* Out of order to pack trace record */
+ __field(u32, nibble_mask)
+ __field(u32, row)
+ __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
+ __field(u8, rank) /* Out of order to pack trace record */
+ __field(u8, bank_group) /* Out of order to pack trace record */
+ __field(u8, bank) /* Out of order to pack trace record */
+ __field(u8, dpa_flags) /* Out of order to pack trace record */
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
+
+ /* DRAM */
+ __entry->dpa = le64_to_cpu(rec->phys_addr);
+ __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
+ __entry->dpa &= CXL_DPA_MASK;
+ __entry->descriptor = rec->descriptor;
+ __entry->type = rec->type;
+ __entry->transaction_type = rec->transaction_type;
+ __entry->validity_flags = get_unaligned_le16(rec->validity_flags);
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
+ __entry->bank_group = rec->bank_group;
+ __entry->bank = rec->bank;
+ __entry->row = get_unaligned_le24(rec->row);
+ __entry->column = get_unaligned_le16(rec->column);
+ memcpy(__entry->cor_mask, &rec->correction_mask,
+ CXL_EVENT_DER_CORRECTION_MASK_SIZE);
+ ),
+
+ CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \
+ "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
+ "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
+ "validity_flags='%s'",
+ __entry->dpa, show_dpa_flags(__entry->dpa_flags),
+ show_event_desc_flags(__entry->descriptor),
+ show_mem_event_type(__entry->type),
+ show_trans_type(__entry->transaction_type),
+ __entry->channel, __entry->rank, __entry->nibble_mask,
+ __entry->bank_group, __entry->bank,
+ __entry->row, __entry->column,
+ __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
+ show_dram_valid_flags(__entry->validity_flags)
+ )
+);
+
+/*
+ * Memory Module Event Record - MMER
+ *
+ * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
+#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
+#define CXL_MMER_LIFE_USED_CHANGE 0x02
+#define CXL_MMER_TEMP_CHANGE 0x03
+#define CXL_MMER_DATA_PATH_ERROR 0x04
+#define CXL_MMER_LSA_ERROR 0x05
+#define show_dev_evt_type(type) __print_symbolic(type, \
+ { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
+ { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
+ { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
+ { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
+ { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
+ { CXL_MMER_LSA_ERROR, "LSA Error" } \
+)
+
+/*
+ * Device Health Information - DHI
+ *
+ * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
+#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
+#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
+#define show_health_status_flags(flags) __print_flags(flags, "|", \
+ { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
+ { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
+ { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
+)
+
+#define CXL_DHI_MS_NORMAL 0x00
+#define CXL_DHI_MS_NOT_READY 0x01
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02
+#define CXL_DHI_MS_ALL_DATA_LOST 0x03
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09
+#define show_media_status(ms) __print_symbolic(ms, \
+ { CXL_DHI_MS_NORMAL, \
+ "Normal" }, \
+ { CXL_DHI_MS_NOT_READY, \
+ "Not Ready" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \
+ "Write Persistency Lost" }, \
+ { CXL_DHI_MS_ALL_DATA_LOST, \
+ "All Data Lost" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \
+ "Write Persistency Loss in the Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \
+ "Write Persistency Loss in Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \
+ "Write Persistency Loss Imminent" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \
+ "All Data Loss in Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \
+ "All Data loss in the Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \
+ "All Data Loss Imminent" } \
+)
+
+#define CXL_DHI_AS_NORMAL 0x0
+#define CXL_DHI_AS_WARNING 0x1
+#define CXL_DHI_AS_CRITICAL 0x2
+#define show_two_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" }, \
+ { CXL_DHI_AS_CRITICAL, "Critical" } \
+)
+#define show_one_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" } \
+)
+
+#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3)
+#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2)
+#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
+#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
+
+TRACE_EVENT(cxl_memory_module,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_event_mem_module *rec),
+
+ TP_ARGS(dev, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+
+ /* Memory Module Event */
+ __field(u8, event_type)
+
+ /* Device Health Info */
+ __field(u8, health_status)
+ __field(u8, media_status)
+ __field(u8, life_used)
+ __field(u32, dirty_shutdown_cnt)
+ __field(u32, cor_vol_err_cnt)
+ __field(u32, cor_per_err_cnt)
+ __field(s16, device_temp)
+ __field(u8, add_status)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
+
+ /* Memory Module Event */
+ __entry->event_type = rec->event_type;
+
+ /* Device Health Info */
+ __entry->health_status = rec->info.health_status;
+ __entry->media_status = rec->info.media_status;
+ __entry->life_used = rec->info.life_used;
+ __entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
+ __entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
+ __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
+ __entry->device_temp = get_unaligned_le16(rec->info.device_temp);
+ __entry->add_status = rec->info.add_status;
+ ),
+
+ CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
+ "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
+ "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
+ "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
+ show_dev_evt_type(__entry->event_type),
+ show_health_status_flags(__entry->health_status),
+ show_media_status(__entry->media_status),
+ show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
+ show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
+ __entry->life_used, __entry->device_temp,
+ __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
+ __entry->cor_per_err_cnt
+ )
+);
+
#endif /* _CXL_EVENTS_H */
#define TRACE_INCLUDE_FILE trace
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 197ecffce4d0..64258442622a 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -157,6 +157,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
+/* CXL 3.0 8.2.8.3.1 Event Status Register */
+#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00
+#define CXLDEV_EVENT_STATUS_INFO BIT(0)
+#define CXLDEV_EVENT_STATUS_WARN BIT(1)
+#define CXLDEV_EVENT_STATUS_FAIL BIT(2)
+#define CXLDEV_EVENT_STATUS_FATAL BIT(3)
+
+#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \
+ CXLDEV_EVENT_STATUS_WARN | \
+ CXLDEV_EVENT_STATUS_FAIL | \
+ CXLDEV_EVENT_STATUS_FATAL)
+
+/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */
+#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0)
+#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4)
+
/* CXL 2.0 8.2.8.4 Mailbox Registers */
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
#define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
@@ -262,6 +278,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
* cxl_decoder flags that define the type of memory / devices this
* decoder supports as well as configuration lock status See "CXL 2.0
* 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
+ * Additionally indicate whether decoder settings were autodetected,
+ * user customized.
*/
#define CXL_DECODER_F_RAM BIT(0)
#define CXL_DECODER_F_PMEM BIT(1)
@@ -321,12 +339,36 @@ enum cxl_decoder_mode {
CXL_DECODER_DEAD,
};
+static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
+{
+ static const char * const names[] = {
+ [CXL_DECODER_NONE] = "none",
+ [CXL_DECODER_RAM] = "ram",
+ [CXL_DECODER_PMEM] = "pmem",
+ [CXL_DECODER_MIXED] = "mixed",
+ };
+
+ if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
+ return names[mode];
+ return "mixed";
+}
+
+/*
+ * Track whether this decoder is reserved for region autodiscovery, or
+ * free for userspace provisioning.
+ */
+enum cxl_decoder_state {
+ CXL_DECODER_STATE_MANUAL,
+ CXL_DECODER_STATE_AUTO,
+};
+
/**
* struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
* @mode: which memory type / access-mode-partition this decoder targets
+ * @state: autodiscovery state
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
@@ -334,6 +376,7 @@ struct cxl_endpoint_decoder {
struct resource *dpa_res;
resource_size_t skip;
enum cxl_decoder_mode mode;
+ enum cxl_decoder_state state;
int pos;
};
@@ -367,6 +410,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @platform_data: platform specific configuration data
+ * @range_lock: sync region autodiscovery by address range
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
@@ -374,6 +418,7 @@ struct cxl_root_decoder {
atomic_t region_id;
cxl_calc_hb_fn calc_hb;
void *platform_data;
+ struct mutex range_lock;
struct cxl_switch_decoder cxlsd;
};
@@ -423,6 +468,13 @@ struct cxl_region_params {
*/
#define CXL_REGION_F_INCOHERENT 0
+/*
+ * Indicate whether this region has been assembled by autodetection or
+ * userspace assembly. Prevent endpoint decoders outside of automatic
+ * detection from being added to the region.
+ */
+#define CXL_REGION_F_AUTO 1
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
@@ -478,6 +530,12 @@ struct cxl_pmem_region {
struct cxl_pmem_region_mapping mapping[];
};
+struct cxl_dax_region {
+ struct device dev;
+ struct cxl_region *cxlr;
+ struct range hpa_range;
+};
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -618,8 +676,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
+struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
+bool is_switch_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets,
@@ -670,6 +730,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_MEMORY_EXPANDER 5
#define CXL_DEVICE_REGION 6
#define CXL_DEVICE_PMEM_REGION 7
+#define CXL_DEVICE_DAX_REGION 8
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -686,6 +747,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
+int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled);
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
{
@@ -695,6 +759,15 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
return NULL;
}
+static inline int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled)
+{
+ return 0;
+}
+static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ return NULL;
+}
#endif
/*
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 2d85776236dd..64ede1a06eaf 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -4,6 +4,7 @@
#define __CXL_MEM_H__
#include <uapi/linux/cxl_mem.h>
#include <linux/cdev.h>
+#include <linux/uuid.h>
#include "cxl.h"
/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -38,6 +39,7 @@
* @cxl_nvb: coordinate removal of @cxl_nvd if present
* @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
* @id: id number of this memdev instance.
+ * @depth: endpoint port depth
*/
struct cxl_memdev {
struct device dev;
@@ -47,6 +49,7 @@ struct cxl_memdev {
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
int id;
+ int depth;
};
static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -79,6 +82,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
}
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t len,
+ resource_size_t skipped);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
@@ -194,6 +200,34 @@ struct cxl_endpoint_dvsec_info {
};
/**
+ * Event Interrupt Policy
+ *
+ * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
+ */
+enum cxl_event_int_mode {
+ CXL_INT_NONE = 0x00,
+ CXL_INT_MSI_MSIX = 0x01,
+ CXL_INT_FW = 0x02
+};
+struct cxl_event_interrupt_policy {
+ u8 info_settings;
+ u8 warn_settings;
+ u8 failure_settings;
+ u8 fatal_settings;
+} __packed;
+
+/**
+ * struct cxl_event_state - Event log driver state
+ *
+ * @event_buf: Buffer to receive event data
+ * @event_log_lock: Serialize event_buf and log use
+ */
+struct cxl_event_state {
+ struct cxl_get_event_payload *buf;
+ struct mutex log_lock;
+};
+
+/**
* struct cxl_dev_state - The driver device state
*
* cxl_dev_state represents the CXL driver/device state. It provides an
@@ -266,14 +300,21 @@ struct cxl_dev_state {
struct xarray doe_mbs;
+ struct cxl_event_state event;
+
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
enum cxl_opcode {
CXL_MBOX_OP_INVALID = 0x0000,
CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
+ CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100,
+ CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101,
+ CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102,
+ CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103,
CXL_MBOX_OP_GET_FW_INFO = 0x0200,
CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
+ CXL_MBOX_OP_SET_TIMESTAMP = 0x0301,
CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
CXL_MBOX_OP_GET_LOG = 0x0401,
CXL_MBOX_OP_IDENTIFY = 0x4000,
@@ -347,6 +388,136 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps;
} __packed;
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_hdr {
+ uuid_t id;
+ u8 length;
+ u8 flags[3];
+ __le16 handle;
+ __le16 related_handle;
+ __le64 timestamp;
+ u8 maint_op_class;
+ u8 reserved[15];
+} __packed;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+struct cxl_event_record_raw {
+ struct cxl_event_record_hdr hdr;
+ u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
+} __packed;
+
+/*
+ * Get Event Records output payload
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-50
+ */
+#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0)
+#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
+struct cxl_get_event_payload {
+ u8 flags;
+ u8 reserved1;
+ __le16 overflow_err_count;
+ __le64 first_overflow_timestamp;
+ __le64 last_overflow_timestamp;
+ __le16 record_count;
+ u8 reserved2[10];
+ struct cxl_event_record_raw records[];
+} __packed;
+
+/*
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
+ */
+enum cxl_event_log_type {
+ CXL_EVENT_TYPE_INFO = 0x00,
+ CXL_EVENT_TYPE_WARN,
+ CXL_EVENT_TYPE_FAIL,
+ CXL_EVENT_TYPE_FATAL,
+ CXL_EVENT_TYPE_MAX
+};
+
+/*
+ * Clear Event Records input payload
+ * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
+ */
+struct cxl_mbox_clear_event_payload {
+ u8 event_log; /* enum cxl_event_log_type */
+ u8 clear_flags;
+ u8 nr_recs;
+ u8 reserved[3];
+ __le16 handles[];
+} __packed;
+#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
+struct cxl_event_gen_media {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 device[3];
+ u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ u8 reserved[46];
+} __packed;
+
+/*
+ * DRAM Event Record - DER
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
+ */
+#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20
+struct cxl_event_dram {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 nibble_mask[3];
+ u8 bank_group;
+ u8 bank;
+ u8 row[3];
+ u8 column[2];
+ u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
+ u8 reserved[0x17];
+} __packed;
+
+/*
+ * Get Health Info Record
+ * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 add_status;
+ u8 life_used;
+ u8 device_temp[2];
+ u8 dirty_shutdown_cnt[4];
+ u8 cor_vol_err_cnt[4];
+ u8 cor_per_err_cnt[4];
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_event_mem_module {
+ struct cxl_event_record_hdr hdr;
+ u8 event_type;
+ struct cxl_get_health_info info;
+ u8 reserved[0x3d];
+} __packed;
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
@@ -372,6 +543,12 @@ struct cxl_mbox_set_partition_info {
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
+/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
+struct cxl_mbox_set_timestamp_in {
+ __le64 timestamp;
+
+} __packed;
+
/**
* struct cxl_mem_command - Driver representation of a memory device command
* @info: Command information as it exists for the UAPI
@@ -440,6 +617,9 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
+void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
+int cxl_set_timestamp(struct cxl_dev_state *cxlds);
+
#ifdef CONFIG_CXL_SUSPEND
void cxl_mem_active_inc(void);
void cxl_mem_active_dec(void);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 77dbdb980b12..a8ea04f536ab 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -53,6 +53,12 @@
#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8)
#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16)
+/*
+ * NOTE: Currently all the functions which are enabled for CXL require their
+ * vectors to be in the first 16. Use this as the default max.
+ */
+#define CXL_PCI_DEFAULT_MAX_VECTORS 16
+
/* Register Block Identifier (RBI) */
enum cxl_regloc_type {
CXL_REGLOC_RBI_EMPTY = 0,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a509640994d7..60b23624d167 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -473,8 +473,234 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev)
return 0;
}
+static void free_event_buf(void *buf)
+{
+ kvfree(buf);
+}
+
+/*
+ * There is a single buffer for reading event logs from the mailbox. All logs
+ * share this buffer protected by the cxlds->event_log_lock.
+ */
+static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
+{
+ struct cxl_get_event_payload *buf;
+
+ buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cxlds->event.buf = buf;
+
+ return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf);
+}
+
+static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
+{
+ int nvecs;
+
+ /*
+ * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
+ * not generate INTx messages if that function participates in
+ * CXL.cache or CXL.mem.
+ *
+ * Additionally pci_alloc_irq_vectors() handles calling
+ * pci_free_irq_vectors() automatically despite not being called
+ * pcim_*. See pci_setup_msi_context().
+ */
+ nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI);
+ if (nvecs < 1) {
+ dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
+ return -ENXIO;
+ }
+ return 0;
+}
+
+struct cxl_dev_id {
+ struct cxl_dev_state *cxlds;
+};
+
+static irqreturn_t cxl_event_thread(int irq, void *id)
+{
+ struct cxl_dev_id *dev_id = id;
+ struct cxl_dev_state *cxlds = dev_id->cxlds;
+ u32 status;
+
+ do {
+ /*
+ * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
+ * ignore the reserved upper 32 bits
+ */
+ status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
+ /* Ignore logs unknown to the driver */
+ status &= CXLDEV_EVENT_STATUS_ALL;
+ if (!status)
+ break;
+ cxl_mem_get_event_records(cxlds, status);
+ cond_resched();
+ } while (status);
+
+ return IRQ_HANDLED;
+}
+
+static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
+{
+ struct device *dev = cxlds->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct cxl_dev_id *dev_id;
+ int irq;
+
+ if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
+ return -ENXIO;
+
+ /* dev_id must be globally unique and must contain the cxlds */
+ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
+ if (!dev_id)
+ return -ENOMEM;
+ dev_id->cxlds = cxlds;
+
+ irq = pci_irq_vector(pdev,
+ FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
+ if (irq < 0)
+ return irq;
+
+ return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
+ IRQF_SHARED | IRQF_ONESHOT, NULL,
+ dev_id);
+}
+
+static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
+ .payload_out = policy,
+ .size_out = sizeof(*policy),
+ };
+ int rc;
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
+ rc);
+
+ return rc;
+}
+
+static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ *policy = (struct cxl_event_interrupt_policy) {
+ .info_settings = CXL_INT_MSI_MSIX,
+ .warn_settings = CXL_INT_MSI_MSIX,
+ .failure_settings = CXL_INT_MSI_MSIX,
+ .fatal_settings = CXL_INT_MSI_MSIX,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
+ .payload_in = policy,
+ .size_in = sizeof(*policy),
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0) {
+ dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
+ rc);
+ return rc;
+ }
+
+ /* Retrieve final interrupt settings */
+ return cxl_event_get_int_policy(cxlds, policy);
+}
+
+static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+{
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ rc = cxl_event_config_msgnums(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_req_irq(cxlds, policy.info_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.warn_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.failure_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static bool cxl_event_int_is_fw(u8 setting)
+{
+ u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
+
+ return mode == CXL_INT_FW;
+}
+
+static int cxl_event_config(struct pci_host_bridge *host_bridge,
+ struct cxl_dev_state *cxlds)
+{
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ /*
+ * When BIOS maintains CXL error reporting control, it will process
+ * event records. Only one agent can do so.
+ */
+ if (!host_bridge->native_cxl_error)
+ return 0;
+
+ rc = cxl_mem_alloc_event_buf(cxlds);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_get_int_policy(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ if (cxl_event_int_is_fw(policy.info_settings) ||
+ cxl_event_int_is_fw(policy.warn_settings) ||
+ cxl_event_int_is_fw(policy.failure_settings) ||
+ cxl_event_int_is_fw(policy.fatal_settings)) {
+ dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n");
+ return -EBUSY;
+ }
+
+ rc = cxl_event_irqsetup(cxlds);
+ if (rc)
+ return rc;
+
+ cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
+
+ return 0;
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
@@ -490,6 +716,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = pcim_enable_device(pdev);
if (rc)
return rc;
+ pci_set_master(pdev);
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
@@ -538,6 +765,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_set_timestamp(cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
@@ -546,10 +777,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_alloc_irq_vectors(pdev);
+ if (rc)
+ return rc;
+
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
+ rc = cxl_event_config(host_bridge, cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_pci_ras_unmask(pdev);
if (rc)
dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index eedefebc4283..71cfa1fdf902 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev)
return rc;
set_bit(NDD_LABELING, &flags);
+ set_bit(NDD_REGISTER_SYNC, &flags);
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
@@ -225,11 +226,35 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
}
+static int detach_nvdimm(struct device *dev, void *data)
+{
+ struct cxl_nvdimm *cxl_nvd;
+ bool release = false;
+
+ if (!is_cxl_nvdimm(dev))
+ return 0;
+
+ device_lock(dev);
+ if (!dev->driver)
+ goto out;
+
+ cxl_nvd = to_cxl_nvdimm(dev);
+ if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
+ release = true;
+out:
+ device_unlock(dev);
+ if (release)
+ device_release_driver(dev);
+ return 0;
+}
+
static void unregister_nvdimm_bus(void *_cxl_nvb)
{
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
+ bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, detach_nvdimm);
+
cxl_nvb->nvdimm_bus = NULL;
nvdimm_bus_unregister(nvdimm_bus);
}
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 5453771bf330..d6c151dabaa7 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -30,57 +30,111 @@ static void schedule_detach(void *cxlmd)
schedule_cxl_memdev_detach(cxlmd);
}
-static int cxl_port_probe(struct device *dev)
+static int discover_region(struct device *dev, void *root)
+{
+ struct cxl_endpoint_decoder *cxled;
+ int rc;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
+ return 0;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO)
+ return 0;
+
+ /*
+ * Region enumeration is opportunistic, if this add-event fails,
+ * continue to the next endpoint decoder.
+ */
+ rc = cxl_add_to_region(root, cxled);
+ if (rc)
+ dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
+ cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
+
+ return 0;
+}
+
+static int cxl_switch_port_probe(struct cxl_port *port)
{
- struct cxl_port *port = to_cxl_port(dev);
struct cxl_hdm *cxlhdm;
int rc;
+ rc = devm_cxl_port_enumerate_dports(port);
+ if (rc < 0)
+ return rc;
- if (!is_cxl_endpoint(port)) {
- rc = devm_cxl_port_enumerate_dports(port);
- if (rc < 0)
- return rc;
- if (rc == 1)
- return devm_cxl_add_passthrough_decoder(port);
- }
+ if (rc == 1)
+ return devm_cxl_add_passthrough_decoder(port);
cxlhdm = devm_cxl_setup_hdm(port);
if (IS_ERR(cxlhdm))
return PTR_ERR(cxlhdm);
- if (is_cxl_endpoint(port)) {
- struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ return devm_cxl_enumerate_decoders(cxlhdm);
+}
+
+static int cxl_endpoint_port_probe(struct cxl_port *port)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_hdm *cxlhdm;
+ struct cxl_port *root;
+ int rc;
+
+ cxlhdm = devm_cxl_setup_hdm(port);
+ if (IS_ERR(cxlhdm))
+ return PTR_ERR(cxlhdm);
- /* Cache the data early to ensure is_visible() works */
- read_cdat_data(port);
+ /* Cache the data early to ensure is_visible() works */
+ read_cdat_data(port);
- get_device(&cxlmd->dev);
- rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
- if (rc)
- return rc;
+ get_device(&cxlmd->dev);
+ rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd);
+ if (rc)
+ return rc;
- rc = cxl_hdm_decode_init(cxlds, cxlhdm);
- if (rc)
- return rc;
+ rc = cxl_hdm_decode_init(cxlds, cxlhdm);
+ if (rc)
+ return rc;
- rc = cxl_await_media_ready(cxlds);
- if (rc) {
- dev_err(dev, "Media not active (%d)\n", rc);
- return rc;
- }
+ rc = cxl_await_media_ready(cxlds);
+ if (rc) {
+ dev_err(&port->dev, "Media not active (%d)\n", rc);
+ return rc;
}
rc = devm_cxl_enumerate_decoders(cxlhdm);
- if (rc) {
- dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
+ if (rc)
return rc;
- }
+
+ /*
+ * This can't fail in practice as CXL root exit unregisters all
+ * descendant ports and that in turn synchronizes with cxl_port_probe()
+ */
+ root = find_cxl_root(&cxlmd->dev);
+
+ /*
+ * Now that all endpoint decoders are successfully enumerated, try to
+ * assemble regions from committed decoders
+ */
+ device_for_each_child(&port->dev, root, discover_region);
+ put_device(&root->dev);
return 0;
}
+static int cxl_port_probe(struct device *dev)
+{
+ struct cxl_port *port = to_cxl_port(dev);
+
+ if (is_cxl_endpoint(port))
+ return cxl_endpoint_port_probe(port);
+ return cxl_switch_port_probe(port);
+}
+
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)